From 5d9d7fcdb442ea82d2da19a15a98eca1349815e1 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Wed, 1 Nov 2023 14:25:21 +0000
Subject: [PATCH 01/14] added checks on workspace params

---
 src/ocl/convolutionocl.cpp | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp
index 1bfba9871d..d94819bd21 100644
--- a/src/ocl/convolutionocl.cpp
+++ b/src/ocl/convolutionocl.cpp
@@ -131,6 +131,16 @@ static inline void ValidateGroupCount(const TensorDescriptor& x,
         MIOPEN_THROW(miopenStatusBadParm, "Invalid group number");
 }
 
+static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize) {
+  if (!workSpace && workSpaceSize != 0) {
+    MIOPEN_THROW(miopenStatusBadParm, "workspace size is > 0 but ptr is null");
+  }
+  if (workSpace && workSpaceSize == 0) {
+    MIOPEN_THROW(miopenStatusBadParm, "workspace size is 0 but ptr is non-null");
+  }
+  /// \todo could add a check here that workSpace points to GPU memory
+}
+
 static Invoker PrepareInvoker(ExecutionContext ctx,
                               const conv::ProblemDescription& problem,
                               const NetworkConfig& config,
@@ -260,6 +270,7 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle,
                                                  size_t workSpaceSize,
                                                  bool exhaustiveSearch) const
 {
+    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
     if(x == nullptr || w == nullptr || y == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
@@ -494,6 +505,7 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle,
                                                Data_t workSpace,
                                                size_t workSpaceSize) const
 {
+    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
 
     const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
@@ -807,6 +819,7 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle,
                                                         const std::size_t workSpaceSize,
                                                         const solver::Id solver_id) const
 {
+    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
     const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
 
@@ -841,6 +854,7 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle,
                                                      size_t workSpaceSize,
                                                      bool exhaustiveSearch) const
 {
+    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
     if(dx == nullptr || w == nullptr || dy == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
@@ -937,6 +951,7 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle,
                                                     Data_t workSpace,
                                                     size_t workSpaceSize) const
 {
+    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
 
     if(!(dyDesc.IsPacked() && wDesc.IsPacked() && dxDesc.IsPacked()))
@@ -1009,6 +1024,7 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle,
                                                          std::size_t workSpaceSize,
                                                          solver::Id solver_id) const
 {
+    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
     auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};
 
@@ -1049,6 +1065,7 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle,
                                                         size_t workSpaceSize,
                                                         bool exhaustiveSearch) const
 {
+    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
     if(x == nullptr || dw == nullptr || dy == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
@@ -1144,6 +1161,7 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle,
                                                        Data_t workSpace,
                                                        size_t workSpaceSize) const
 {
+    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
     decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
     ValidateTensors(tensors);
@@ -1207,6 +1225,7 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle,
                                                     std::size_t workSpaceSize,
                                                     solver::Id solver_id) const
 {
+    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
     auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
     ValidateTensors(tensors);

From 75e18bb7c13e69a4ef3608ffee114f2703bccbdb Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Wed, 8 Nov 2023 15:57:40 +0000
Subject: [PATCH 02/14] addressed review comments

---
 src/ocl/convolutionocl.cpp | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp
index d94819bd21..413e11d789 100644
--- a/src/ocl/convolutionocl.cpp
+++ b/src/ocl/convolutionocl.cpp
@@ -131,14 +131,16 @@ static inline void ValidateGroupCount(const TensorDescriptor& x,
         MIOPEN_THROW(miopenStatusBadParm, "Invalid group number");
 }
 
-static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize) {
-  if (!workSpace && workSpaceSize != 0) {
-    MIOPEN_THROW(miopenStatusBadParm, "workspace size is > 0 but ptr is null");
-  }
-  if (workSpace && workSpaceSize == 0) {
-    MIOPEN_THROW(miopenStatusBadParm, "workspace size is 0 but ptr is non-null");
-  }
-  /// \todo could add a check here that workSpace points to GPU memory
+static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize)
+{
+
+    bool x = (workSpace != nullptr);
+    bool y = (workSpaceSize != 0);
+
+    assert(((x && y) || (!x && !y)) && "workspace pointer and size don't match. Either both should "
+                                       "be zero or both should be non-zero");
+
+    /// \todo could add a check here that workSpace points to GPU memory
 }
 
 static Invoker PrepareInvoker(ExecutionContext ctx,
@@ -270,8 +272,8 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle,
                                                  size_t workSpaceSize,
                                                  bool exhaustiveSearch) const
 {
-    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     if(x == nullptr || w == nullptr || y == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
     if(returnedAlgoCount == nullptr)
@@ -505,8 +507,8 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle,
                                                Data_t workSpace,
                                                size_t workSpaceSize) const
 {
-    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
 
     const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
     ValidateTensors(tensors);
@@ -819,8 +821,8 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle,
                                                         const std::size_t workSpaceSize,
                                                         const solver::Id solver_id) const
 {
-    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
 
     ValidateTensors(tensors);
@@ -854,8 +856,8 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle,
                                                      size_t workSpaceSize,
                                                      bool exhaustiveSearch) const
 {
-    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     if(dx == nullptr || w == nullptr || dy == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
     if(returnedAlgoCount == nullptr)
@@ -951,8 +953,8 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle,
                                                     Data_t workSpace,
                                                     size_t workSpaceSize) const
 {
-    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
 
     if(!(dyDesc.IsPacked() && wDesc.IsPacked() && dxDesc.IsPacked()))
     {
@@ -1024,8 +1026,8 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle,
                                                          std::size_t workSpaceSize,
                                                          solver::Id solver_id) const
 {
-    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};
 
     ValidateTensors(tensors);
@@ -1065,8 +1067,8 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle,
                                                         size_t workSpaceSize,
                                                         bool exhaustiveSearch) const
 {
-    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     if(x == nullptr || dw == nullptr || dy == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
     if(returnedAlgoCount == nullptr)
@@ -1161,8 +1163,8 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle,
                                                        Data_t workSpace,
                                                        size_t workSpaceSize) const
 {
-    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
     ValidateTensors(tensors);
     ValidateAlphaBeta(alpha, beta);
@@ -1225,8 +1227,8 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle,
                                                     std::size_t workSpaceSize,
                                                     solver::Id solver_id) const
 {
-    ValidateWorkspace(workSpace, workSpaceSize);
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
     ValidateTensors(tensors);
 

From 9bb0750a7fcdada6a8897b30063d14e258ccb851 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Thu, 9 Nov 2023 00:28:22 +0000
Subject: [PATCH 03/14] fix release build warning

---
 src/ocl/convolutionocl.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp
index eab7649c20..2827ce62a8 100644
--- a/src/ocl/convolutionocl.cpp
+++ b/src/ocl/convolutionocl.cpp
@@ -134,8 +134,8 @@ static inline void ValidateGroupCount(const TensorDescriptor& x,
 static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize)
 {
 
-    bool x = (workSpace != nullptr);
-    bool y = (workSpaceSize != 0);
+    [[maybe_unused]] bool x = (workSpace != nullptr);
+    [[maybe_unused]] bool y = (workSpaceSize != 0);
 
     assert(((x && y) || (!x && !y)) && "workspace pointer and size don't match. Either both should "
                                        "be zero or both should be non-zero");

From 675b6a9e172d03712d79868a48353d5b0cb97b88 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Sat, 11 Nov 2023 15:50:22 +0000
Subject: [PATCH 04/14] WIP: workspace abstraction

---
 test/conv_common.hpp                    |  97 ++++++---------
 test/ctc.cpp                            |   9 +-
 test/find_2_conv.cpp                    |  14 +--
 test/find_db.cpp                        |  27 ++---
 test/gtest/conv_api_strided_tensors.cpp |  16 +--
 test/gtest/solver_bwd.hpp               |  11 +-
 test/gtest/solver_fwd.hpp               |  11 +-
 test/gtest/solver_wrw.hpp               |  11 +-
 test/pooling_common.hpp                 |  17 +--
 test/reduce_test.cpp                    |  57 +++++----
 test/rnn_vanilla_common.hpp             |  60 +++++-----
 test/workspace.hpp                      | 149 ++++++++++++++++++++++++
 12 files changed, 303 insertions(+), 176 deletions(-)
 create mode 100644 test/workspace.hpp

diff --git a/test/conv_common.hpp b/test/conv_common.hpp
index 60291b8d48..58904c2b65 100644
--- a/test/conv_common.hpp
+++ b/test/conv_common.hpp
@@ -25,6 +25,7 @@
  *******************************************************************************/
 #pragma once
 #include "test.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <iostream>
 #include <iterator>
@@ -320,13 +321,12 @@ struct conv_base
             EXPECT_EQUAL(miopenStatusSuccess,
                          miopenGetSolutionWorkspaceSize(solution, &workspace_size));
 
-            const auto workspace_dev = workspace_size != 0
-                                           ? get_handle().Write(std::vector<char>(workspace_size))
-                                           : nullptr;
+            Workspace wspace{};
+            wspace.resize(workspace_size);
 
             EXPECT_EQUAL(miopenStatusSuccess,
                          miopenRunSolution(
-                             handle, solution, 3, arguments, workspace_dev.get(), workspace_size));
+                             handle, solution, 3, arguments, wspace.ptr(), wspace.size()));
         }
 
         const auto& solution_deref = miopen::deref(solutions.front());
@@ -584,19 +584,6 @@ struct verify_forward_conv : conv_base<T, Tout>
         return rout;
     }
 
-    void resize_workspace(miopen::Handle& h,
-                          const std::size_t sz,
-                          std::vector<char>& ws,
-                          miopen::Allocator::ManageDataPtr& ws_dev) const
-    {
-        ws_dev.reset();
-        if(sz > 0)
-        {
-            ws.resize(sz);
-            ws_dev = h.Write(ws);
-        }
-    }
-
     tensor<Tout> gpu()
     {
         auto&& handle = get_handle();
@@ -617,8 +604,7 @@ struct verify_forward_conv : conv_base<T, Tout>
         bool fallback_path_taken      = false;
         std::size_t count             = 0;
 
-        std::vector<char> ws;
-        miopen::Allocator::ManageDataPtr ws_dev = nullptr;
+        Workspace wspace{};
 
         const auto ctx     = ExecutionContext{&handle};
         const auto problem = ConvProblemDescription{
@@ -638,7 +624,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
                     const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    resize_workspace(handle, workspace_size, ws, ws_dev);
+                    wspace.resize(workspace_size);
 
                     filter.FindConvBwdDataAlgorithm(handle,
                                                     input.desc,
@@ -650,8 +636,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                     1,
                                                     &ret_algo_count,
                                                     &perf,
-                                                    ws_dev.get(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     search);
                 }
                 count = filter.GetSolutionCount(ctx, problem);
@@ -682,7 +668,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                         std::cout << "WARNING: workspace size mismatch: " << selected.workspace_size
                                   << " != " << ws_size << std::endl;
                 }
-                resize_workspace(handle, selected.workspace_size, ws, ws_dev);
+                wspace.resize(selected.workspace_size);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
@@ -693,8 +679,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                     wei_dev.get(),
                                                     rout.desc,
                                                     out_dev.get(),
-                                                    ws_dev.get(),
-                                                    selected.workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     selected.solution_id);
             }
             else
@@ -704,7 +690,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
                     const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    resize_workspace(handle, workspace_size, ws, ws_dev);
+                    wspace.resize(workspace_size);
 
                     filter.FindConvFwdAlgorithm(handle,
                                                 input.desc,
@@ -716,8 +702,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                ws_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
                 }
 
@@ -749,7 +735,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                         std::cout << "WARNING: workspace size mismatch: " << selected.workspace_size
                                   << " != " << ws_size << std::endl;
                 }
-                resize_workspace(handle, selected.workspace_size, ws, ws_dev);
+                wspace.resize(selected.workspace_size);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
@@ -760,8 +746,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                    in_dev.get(),
                                                    rout.desc,
                                                    out_dev.get(),
-                                                   ws_dev.get(),
-                                                   selected.workspace_size,
+                                                   wspace.ptr(),
+                                                   wspace.size(),
                                                    selected.solution_id);
             }
             break;
@@ -817,8 +803,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                 if(api == ConvApi::Find_1_0)
                 {
                     const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    std::vector<char> workspace(workspace_size);
-                    auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+                    wspace.resize(workspace_size);
 
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
@@ -835,15 +820,13 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                workspace_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
 
-                    workspace_dev.reset();
                     if(perf.memory > 0)
                     {
-                        workspace.resize(perf.memory);
-                        workspace_dev = handle.Write(workspace);
+                        wspace.resize(perf.memory);
                     }
 
                     filter.ConvolutionForward(handle,
@@ -856,8 +839,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                               &beta,
                                               rout.desc,
                                               out_dev.get(),
-                                              workspace_dev.get(),
-                                              workspace_size);
+                                              wspace.ptr(),
+                                              wspace.size());
 
                     /// \ref read_solver_name
                     auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken);
@@ -889,8 +872,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                 if(api == ConvApi::Find_1_0)
                 {
                     const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    std::vector<char> workspace(workspace_size);
-                    auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+                    wspace.resize(workspace_size);
 
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
@@ -909,15 +891,13 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                         1,
                                                         &ret_algo_count,
                                                         &perf,
-                                                        workspace_dev.get(),
-                                                        workspace_size,
+                                                        wspace.ptr(),
+                                                        wspace.size(),
                                                         search);
 
-                        workspace_dev.reset();
                         if(perf.memory > 0)
                         {
-                            workspace.resize(perf.memory);
-                            workspace_dev = handle.Write(workspace);
+                            wspace.resize(perf.memory);
                         }
 
                         filter.ConvolutionBackwardData(handle,
@@ -930,8 +910,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                        &beta,
                                                        rout.desc,
                                                        out_dev.get(),
-                                                       workspace_dev.get(),
-                                                       workspace_size);
+                                                       wspace.ptr(),
+                                                       wspace.size());
                     }
                     else
                     {
@@ -945,15 +925,13 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                     1,
                                                     &ret_algo_count,
                                                     &perf,
-                                                    workspace_dev.get(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     search);
 
-                        workspace_dev.reset();
                         if(perf.memory > 0)
                         {
-                            workspace.resize(perf.memory);
-                            workspace_dev = handle.Write(workspace);
+                            wspace.resize(perf.memory);
                         }
 
                         filter.ConvolutionForward(handle,
@@ -966,8 +944,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                   &beta,
                                                   rout.desc,
                                                   out_dev.get(),
-                                                  workspace_dev.get(),
-                                                  workspace_size);
+                                                  wspace.ptr(),
+                                                  wspace.size());
                     }
 
                     /// \ref read_solver_name
@@ -1118,8 +1096,7 @@ struct verify_backward_conv : conv_base<T>
         {
         case ConvApi::Immediate: {
             const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(workspace_size);
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1138,8 +1115,8 @@ struct verify_backward_conv : conv_base<T>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                workspace_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
                 }
                 count = filter.GetSolutionCount(ctx, problem);
diff --git a/test/ctc.cpp b/test/ctc.cpp
index 6393728957..e9e1713668 100644
--- a/test/ctc.cpp
+++ b/test/ctc.cpp
@@ -29,6 +29,7 @@
 #include "tensor_holder.hpp"
 #include "test.hpp"
 #include "verify.hpp"
+#include "workspace.hpp"
 #include "rnn_util.hpp"
 #include "random.hpp"
 #include <array>
@@ -633,8 +634,8 @@ struct verify_ctcloss
                                                                    inputLengths.data(),
                                                                    miopenCTCLossAlgo_t(0));
 
-        auto workSpace     = tensor<T>{workSpaceSize / sizeof(T)};
-        auto workSpace_dev = handle.Write(workSpace.data);
+        Workspace wspace{};
+        wspace.resize(workSpaceSize);
 
         auto losses_gpu = losses;
         auto grads_gpu  = grads;
@@ -653,8 +654,8 @@ struct verify_ctcloss
                             grads.desc,
                             grads_dev.get(),
                             miopenCTCLossAlgo_t(0),
-                            workSpace_dev.get(),
-                            workSpaceSize);
+                            wspace.ptr(),
+                            wspace.size());
 
         losses_gpu.data = handle.Read<T>(losses_dev, losses_gpu.data.size());
         grads_gpu.data  = handle.Read<T>(grads_dev, grads_gpu.data.size());
diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp
index a89ce942a7..9148436ec7 100644
--- a/test/find_2_conv.cpp
+++ b/test/find_2_conv.cpp
@@ -27,6 +27,7 @@
 #include "test.hpp"
 #include "driver.hpp"
 #include "get_handle.hpp"
+#include "workspace.hpp"
 
 #include <miopen/miopen.h>
 
@@ -210,12 +211,11 @@ struct Find2Test : test_driver
                 }
 
                 const auto workspace_size = std::min(workspace_limit, workspace_max);
-                workspace_dev             = workspace_size != 0
-                                                ? miopen::deref(handle).Write(std::vector<char>(workspace_size))
-                                                : nullptr;
+                Workspace wspace{};
+                wspace.resize(workspace_size);
 
                 EXPECT_EQUAL(miopenSetFindOptionPreallocatedWorkspace(
-                                 options, workspace_dev.get(), workspace_size),
+                                 options, wspace.ptr(), wspace.size()), 
                              miopenStatusSuccess);
 
                 EXPECT_EQUAL(miopenSetFindOptionPreallocatedTensor(
@@ -318,8 +318,8 @@ struct Find2Test : test_driver
         EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size),
                      miopenStatusSuccess);
 
-        auto workspace_dev =
-            workspace_size != 0 ? handle_deref.Write(std::vector<char>(workspace_size)) : nullptr;
+        Workspace wspace{};
+        wspace.resize(workspace_size);
 
         const auto checked_run_solution = [&](miopenTensorDescriptor_t* descriptors_) {
             auto arguments = std::make_unique<miopenTensorArgument_t[]>(num_arguments);
@@ -333,7 +333,7 @@ struct Find2Test : test_driver
 
             EXPECT_EQUAL(
                 miopenRunSolution(
-                    handle, solution, 3, arguments.get(), workspace_dev.get(), workspace_size),
+                    handle, solution, 3, arguments.get(), wspace.ptr(), wspace.size()),
                 miopenStatusSuccess);
         };
 
diff --git a/test/find_db.cpp b/test/find_db.cpp
index b5dd4b83d6..974136bc59 100644
--- a/test/find_db.cpp
+++ b/test/find_db.cpp
@@ -107,9 +107,8 @@ struct FindDbTest : test_driver
         const auto problem =
             conv::ProblemDescription{y.desc, w.desc, x.desc, filter, conv::Direction::BackwardData};
         const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-
-        auto workspace     = std::vector<char>(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{};
+        wspace.resize(workspace_size);
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -125,8 +124,8 @@ struct FindDbTest : test_driver
                                             1,
                                             &ret_algo_count,
                                             perf,
-                                            workspace_dev.get(),
-                                            workspace_size,
+                                            wspace.ptr(),
+                                            wspace.size(),
                                             false);
         };
 
@@ -141,9 +140,8 @@ struct FindDbTest : test_driver
         const auto problem =
             conv::ProblemDescription{x.desc, w.desc, y.desc, filter, conv::Direction::Forward};
         const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-
-        auto workspace     = std::vector<char>(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{};
+        wspace.resize(workspace_size);
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -159,8 +157,8 @@ struct FindDbTest : test_driver
                                         1,
                                         &ret_algo_count,
                                         perf,
-                                        workspace_dev.get(),
-                                        workspace_size,
+                                        wspace.ptr(),
+                                        wspace.size(),
                                         false);
         };
 
@@ -175,9 +173,8 @@ struct FindDbTest : test_driver
         const auto problem = conv::ProblemDescription{
             y.desc, w.desc, x.desc, filter, conv::Direction::BackwardWeights};
         const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-
-        auto workspace     = std::vector<char>(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{};
+        wspace.resize(workspace_size);
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -193,8 +190,8 @@ struct FindDbTest : test_driver
                                                1,
                                                &ret_algo_count,
                                                perf,
-                                               workspace_dev.get(),
-                                               workspace_size,
+                                               wspace.ptr(),
+                                               wspace.size(),
                                                false);
         };
 
diff --git a/test/gtest/conv_api_strided_tensors.cpp b/test/gtest/conv_api_strided_tensors.cpp
index 9a2876b3f0..04f3ccbf19 100644
--- a/test/gtest/conv_api_strided_tensors.cpp
+++ b/test/gtest/conv_api_strided_tensors.cpp
@@ -29,6 +29,7 @@
 #include <gtest/gtest.h>
 #include <miopen/miopen.h>
 #include "platform.hpp"
+#include "../workspace.hpp"
 
 #define MIOPEN_CHECK_RET(val) ASSERT_EQ(val, miopenStatusSuccess)
 
@@ -66,10 +67,12 @@ class ConvStridedTensors : public ::testing::Test
         MIOPEN_CHECK_RET(miopenSetConvolutionGroupCount(conv_descr, 1));
 
         // Workspace
+        size_t sz = 0;
         MIOPEN_CHECK_RET(miopenConvolutionForwardGetWorkSpaceSize(
-            handle, filter_descr, input_descr, conv_descr, output_descr, &workspace_size));
+            handle, filter_descr, input_descr, conv_descr, output_descr, &sz));
 
         // Data
+        wspace.resize(sz);
         h_input.resize(input_size);
         h_filter.resize(filter_size);
         h_output.resize(output_size);
@@ -125,7 +128,7 @@ class ConvStridedTensors : public ::testing::Test
     std::vector<int> dilation                = {1, 1, 1};
 
     // Workspace
-    size_t workspace_size;
+    Workspace wspace{};
 
     // Data
     const size_t input_size   = input_dims[0] * input_strides[0];
@@ -143,7 +146,6 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
 {
     auto device = Device(handle);
 
-    auto d_workspace = device.Malloc(workspace_size);
     auto d_input     = device.Malloc(input_bytes);
     auto d_filter    = device.Malloc(filter_bytes);
     auto d_output    = device.Malloc(output_bytes);
@@ -168,8 +170,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
                                                     sizeof(perf_results) / sizeof(perf_results[0]),
                                                     &perf_results_count,
                                                     perf_results,
-                                                    d_workspace.Data(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     true),
               miopenStatusSuccess);
     ASSERT_GT(perf_results_count, 0);
@@ -189,8 +191,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
                                        &beta,
                                        output_descr,
                                        d_output.Data(),
-                                       d_workspace.Data(),
-                                       workspace_size),
+                                       wspace.ptr(),
+                                       wspace.size()),
               miopenStatusSuccess);
     ASSERT_TRUE(device.Synchronize());
 }
diff --git a/test/gtest/solver_bwd.hpp b/test/gtest/solver_bwd.hpp
index cb55a5951b..988368e3c0 100644
--- a/test/gtest/solver_bwd.hpp
+++ b/test/gtest/solver_bwd.hpp
@@ -40,6 +40,7 @@
 #include "conv_tensor_gen.hpp"
 
 #include "get_solver.hpp"
+#include "../workspace.hpp"
 
 template <typename T = float, typename Tref = float, bool use_cpu_ref = false>
 struct ConvBwdSolverTest
@@ -76,14 +77,13 @@ struct ConvBwdSolverTest
         if(solv.MayNeedWorkspace())
         {
             const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem);
-            workspace_dev         = handle.Create<T>(cur_sol_ws);
-            workspace_size        = cur_sol_ws;
+            wspace.resize(cur_sol_ws);
         }
 
         const auto invoke_params =
             miopen::conv::DataInvokeParams{tensors,
-                                           workspace_dev.get(),
-                                           workspace_size,
+                                           wspace.ptr(),
+                                           wspace.size(),
                                            conv_desc.attribute.gfx90aFp16alt.GetBwd()};
 
         auto sol = GetSolution(solv, ctx, problem);
@@ -177,8 +177,7 @@ struct ConvBwdSolverTest
     miopen::Allocator::ManageDataPtr in_dev;
     miopen::Allocator::ManageDataPtr wei_dev;
     miopen::Allocator::ManageDataPtr out_dev;
-    miopen::Allocator::ManageDataPtr workspace_dev;
-    size_t workspace_size;
+    Workspace wspace{};
     miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect;
     bool test_skipped             = false;
 };
diff --git a/test/gtest/solver_fwd.hpp b/test/gtest/solver_fwd.hpp
index 88fa9a9c55..a286cc0bbb 100644
--- a/test/gtest/solver_fwd.hpp
+++ b/test/gtest/solver_fwd.hpp
@@ -38,6 +38,7 @@
 
 #include "conv_test_base.hpp"
 #include "get_solver.hpp"
+#include "../workspace.hpp"
 
 template <typename T = float, typename Tref = float, bool use_cpu_ref = false>
 struct ConvFwdSolverTest
@@ -77,14 +78,13 @@ struct ConvFwdSolverTest
         if(solv.MayNeedWorkspace())
         {
             const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem);
-            workspace_dev         = handle.Create<T>(cur_sol_ws);
-            workspace_size        = cur_sol_ws;
+            wspace.resize(cur_sol_ws);
         }
 
         const auto invoke_params =
             miopen::conv::DataInvokeParams{tensors,
-                                           workspace_dev.get(),
-                                           workspace_size,
+                                           wspace.ptr(),
+                                           wspace.size(),
                                            this->conv_desc.attribute.gfx90aFp16alt.GetFwd()};
 
         // auto sol = solv.GetSolution(ctx, problem);
@@ -116,8 +116,7 @@ struct ConvFwdSolverTest
     }
 
     ConvTestCase conv_config;
-    miopen::Allocator::ManageDataPtr workspace_dev;
-    size_t workspace_size;
+    Workspace wspace{};
     miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect;
     bool test_skipped             = false;
     miopenTensorLayout_t tensor_layout;
diff --git a/test/gtest/solver_wrw.hpp b/test/gtest/solver_wrw.hpp
index dde92e2071..097c9c6122 100644
--- a/test/gtest/solver_wrw.hpp
+++ b/test/gtest/solver_wrw.hpp
@@ -40,6 +40,7 @@
 #include "conv_tensor_gen.hpp"
 
 #include "get_solver.hpp"
+#include "../workspace.hpp"
 
 template <typename T = float, typename Tref = float, bool use_cpu_ref = false>
 struct ConvWrwSolverTest
@@ -76,14 +77,13 @@ struct ConvWrwSolverTest
         if(solv.MayNeedWorkspace())
         {
             const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem);
-            workspace_dev         = handle.Create<T>(cur_sol_ws);
-            workspace_size        = cur_sol_ws;
+            wspace.resize(cur_sol_ws);
         }
 
         const auto invoke_params =
             miopen::conv::WrWInvokeParams{tensors,
-                                          workspace_dev.get(),
-                                          workspace_size,
+                                          wspace.ptr(),
+                                          wspace.size(),
                                           conv_desc.attribute.gfx90aFp16alt.GetBwd()};
 
         auto sol = GetSolution(solv, ctx, problem);
@@ -179,8 +179,7 @@ struct ConvWrwSolverTest
     miopen::Allocator::ManageDataPtr in_dev;
     miopen::Allocator::ManageDataPtr wei_dev;
     miopen::Allocator::ManageDataPtr out_dev;
-    miopen::Allocator::ManageDataPtr workspace_dev;
-    size_t workspace_size;
+    Workspace wspace{};
     miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect;
     bool test_skipped             = false;
 };
diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp
index 9058d34321..f5e60f5163 100644
--- a/test/pooling_common.hpp
+++ b/test/pooling_common.hpp
@@ -46,6 +46,7 @@
 #include "tensor_holder.hpp"
 #include "verify.hpp"
 #include "cpu_conv.hpp"
+#include "workspace.hpp"
 
 #define TEST_PADDING_MODE 0
 // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
@@ -201,7 +202,8 @@ struct verify_forward_pooling
 
         auto in_dev        = handle.Write(input.data);
         auto out_dev       = handle.Create<T>(out.data.size());
-        auto workspace_dev = handle.Write(indices);
+        Workspace wspace{};
+        wspace.Write(indices);
 
         float alpha = 1, beta = 0;
         filter.Forward(handle,
@@ -212,10 +214,10 @@ struct verify_forward_pooling
                        out.desc,
                        out_dev.get(),
                        true,
-                       workspace_dev.get(),
-                       indices.size() * sizeof(Index));
+                       wspace.ptr(),
+                       wspace.size());
 
-        indices  = handle.Read<Index>(workspace_dev, indices.size());
+        indices = wspace.Read<decltype(indices)>();
         out.data = handle.Read<T>(out_dev, out.data.size());
         return out;
     }
@@ -403,9 +405,8 @@ struct verify_backward_pooling
         auto out_dev  = handle.Write(out.data);
         auto din_dev  = handle.Create<T>(dinput.data.size());
 
-        // std::vector<char> workspace(filter.GetWorkSpaceSize(out.desc));
-        // auto workspace_dev = handle.Write(workspace);
-        auto workspace_dev = handle.Write(indices);
+        Workspace wspace{};
+        wspace.Write(indices);
 
         float alpha = 1, beta = 0;
         filter.Backward(handle,
@@ -423,7 +424,7 @@ struct verify_backward_pooling
                         // dx
                         dinput.desc,
                         din_dev.get(),
-                        workspace_dev.get());
+                        wspace.prt());
 
         dinput.data = handle.Read<T>(din_dev, dinput.data.size());
         return dinput;
diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp
index bb2aa17e07..fd4e48ce4f 100644
--- a/test/reduce_test.cpp
+++ b/test/reduce_test.cpp
@@ -27,6 +27,7 @@
 #include "driver.hpp"
 #include "test.hpp"
 #include "verify.hpp"
+#include "workspace.hpp"
 #include "get_handle.hpp"
 #include "tensor_holder.hpp"
 #include "random.hpp"
@@ -60,7 +61,7 @@ struct verify_reduce_with_indices
     miopenReduceTensorIndices_t indicesOpt;
     miopenIndicesType_t indicesType;
 
-    verify_reduce_with_indices( // NOLINT (hicpp-member-init)
+    verify_reduce_with_indices(
         const miopen::ReduceTensorDescriptor& reduce_,
         const tensor<T>& input_,
         const tensor<T>& output_,
@@ -68,20 +69,20 @@ struct verify_reduce_with_indices
         const tensor<int>& indices_,
         float alpha_,
         float beta_)
+      : 
+        reduce(reduce_),
+        input(input_),
+        output(output_),
+        workspace(workspace_),
+        indices(indices_),
+        alpha(alpha_),
+        beta(beta_),
+        reduceOp(reduce.reduceTensorOp_)
+        compTypeVal(reduce.reduceTensorCompType_),
+        nanOpt     (reduce.reduceTensorNanOpt_),
+        indicesOpt (reduce.reduceTensorIndices_),
+        indicesType(reduce.reduceTensorIndicesType_)
     {
-        reduce    = reduce_;
-        input     = input_;
-        output    = output_;
-        workspace = workspace_;
-        indices   = indices_;
-        alpha     = alpha_;
-        beta      = beta_;
-
-        reduceOp    = reduce.reduceTensorOp_;
-        compTypeVal = reduce.reduceTensorCompType_;
-        nanOpt      = reduce.reduceTensorNanOpt_;
-        indicesOpt  = reduce.reduceTensorIndices_;
-        indicesType = reduce.reduceTensorIndicesType_;
     }
 
     tensor<float> cpu() const
@@ -339,10 +340,11 @@ struct verify_reduce_with_indices
         auto res         = output;
         auto res_indices = indices;
 
-        auto indices_dev = handle.Write(indices.data);
+        Workspace idxspace{};
+        idxspace.Write(indices.data);
 
-        std::size_t ws_sizeInBytes      = workspace.desc.GetElementSize() * sizeof(T);
-        std::size_t indices_sizeInBytes = indices.desc.GetElementSize() * sizeof(int);
+        Workspace wspace{};
+        wspace.Write(workspace.data);
 
         const double alpha64 = alpha;
         const double beta64  = beta;
@@ -356,13 +358,11 @@ struct verify_reduce_with_indices
 
         if(ws_sizeInBytes > 0)
         {
-            auto workspace_dev = handle.Write(workspace.data);
-
             reduce.ReduceTensor(get_handle(),
                                 indices_dev.get(),
                                 indices_sizeInBytes,
-                                workspace_dev.get(),
-                                ws_sizeInBytes,
+                                wspace.ptr(),
+                                wspace.size(),
                                 alphaPtr,
                                 input.desc,
                                 input_dev.get(),
@@ -373,8 +373,8 @@ struct verify_reduce_with_indices
         else
         {
             reduce.ReduceTensor(get_handle(),
-                                indices_dev.get(),
-                                indices_sizeInBytes,
+                                idxspace.ptr(),
+                                idxspace.size(),
                                 nullptr,
                                 0,
                                 alphaPtr,
@@ -386,7 +386,7 @@ struct verify_reduce_with_indices
         };
 
         res.data         = handle.Read<T>(output_dev, res.data.size());
-        res_indices.data = handle.Read<int>(indices_dev, res_indices.data.size());
+        res_indices.data = idxspace.Read<decltype(res_indices.data)>();
 
         return (std::make_tuple(res, res_indices));
     }
@@ -639,7 +639,8 @@ struct verify_reduce_no_indices
         // replicate
         auto res = output;
 
-        std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T);
+        Workspace wspace{};
+        wspace.Write(workspace.data);
 
         const double alpha64 = alpha;
         const double beta64  = beta;
@@ -653,13 +654,11 @@ struct verify_reduce_no_indices
 
         if(ws_sizeInBytes > 0)
         {
-            auto workspace_dev = handle.Write(workspace.data);
-
             reduce.ReduceTensor(get_handle(),
                                 nullptr,
                                 0,
-                                workspace_dev.get(),
-                                ws_sizeInBytes,
+                                wspace.ptr(),
+                                wspace.size(),
                                 alphaPtr,
                                 input.desc,
                                 input_dev.get(),
diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp
index ef5c1088c3..4d8f6ff1d0 100644
--- a/test/rnn_vanilla_common.hpp
+++ b/test/rnn_vanilla_common.hpp
@@ -35,6 +35,7 @@
 #include "verify.hpp"
 #include "rnn_util.hpp"
 #include "random.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <cmath>
 #include <ctime>
@@ -1436,8 +1437,8 @@ struct verify_forward_infer_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        Workspace wspace{};
+        wspace.resize(workSpaceSize);
 
         auto input_dev = handle.Write(input);
 
@@ -1450,8 +1451,6 @@ struct verify_forward_infer_rnn
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev = handle.Write(workSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -1479,8 +1478,8 @@ struct verify_forward_infer_rnn
                                   ((nohy) ? nullptr : hy_dev.get()),
                                   &hiddenDesc,
                                   nullptr,
-                                  workSpace_dev.get(),
-                                  workSpaceSize);
+                                  wspace.ptr(),
+                                  wspace.size());
 
 #if(MIO_RNN_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -1711,8 +1710,10 @@ struct verify_forward_train_rnn
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
 
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
+        Workspace wspace{};
+        wspace.resize(workSpaceSize);
+        Workspace reserveSpace{};
+        reserveSpace.resize(reserveSpaceSize);
 
         auto input_dev = handle.Write(input);
 
@@ -1726,7 +1727,6 @@ struct verify_forward_train_rnn
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev    = handle.Write(workSpace);
         auto reserveSpace_dev = handle.Write(reserveSpace);
 
         std::vector<int> hlens(3, 0);
@@ -1756,10 +1756,10 @@ struct verify_forward_train_rnn
                                  ((nohy) ? nullptr : hy_dev.get()),
                                  &hiddenDesc,
                                  nullptr,
-                                 workSpace_dev.get(),
-                                 workSpaceSize,
-                                 reserveSpace_dev.get(),
-                                 reserveSpaceSize);
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 reserveSpace.ptr(),
+                                 reserveSpace.size());
 
 #if(MIO_RNN_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -2002,14 +2002,15 @@ struct verify_backward_data_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        auto workSpace_dev = handle.Write(workSpace);
+        Workspace wspace{};
+        wspace.resize(workSpaceSize);
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
         auto yin_dev  = handle.Write(yin);
         auto dyin_dev = handle.Write(dy);
         // auto dhyin_dev        = handle.Write(dhy);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
         auto weights_dev      = handle.Write(weights);
         // auto hx_dev           = handle.Write(initHidden);
 
@@ -2054,15 +2055,15 @@ struct verify_backward_data_rnn
                               ((nodhx) ? nullptr : dhx_dev.get()),
                               &hiddenDesc,
                               nullptr,
-                              workSpace_dev.get(),
-                              workSpaceSize,
-                              reserveSpace_dev.get(),
-                              reserveSpace.size() * sizeof(T));
+                              wspace.ptr(),
+                              wspace.size(),
+                              rspace.ptr(),
+                              rspace.size());
 
         auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                       (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
-                                      handle.Read<T>(reserveSpace_dev, reserveSpace.size()),
-                                      handle.Read<T>(workSpace_dev, workSpace.size()));
+                                      rspace.Read<std::vector<T>>(),
+                                      wspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -2257,8 +2258,11 @@ struct verify_backward_weights_rnn
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        Workspace wspace{};
+        wpace.Write(workSpace);
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
+
         std::vector<T> dweights(weightSize);
         auto dweights_dev = handle.Write(dweights);
         miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize});
@@ -2283,10 +2287,10 @@ struct verify_backward_weights_rnn
                                  dy_dev.get(),
                                  &weightDesc,
                                  dweights_dev.get(),
-                                 workSpace_dev.get(),
-                                 workSpace.size() * sizeof(T),
-                                 reserveSpace_dev.get(),
-                                 reserveSpace.size() * sizeof(T));
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
diff --git a/test/workspace.hpp b/test/workspace.hpp
new file mode 100644
index 0000000000..37137918a7
--- /dev/null
+++ b/test/workspace.hpp
@@ -0,0 +1,149 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#include <hip/hip_runtime.h>
+
+#include "get_handle.hpp"
+
+class Workspace {
+
+  // RAII class for hip allocations
+  class GPUBuffer {
+  public:
+    GPUBuffer() = default;
+
+    explicit GPUBuffer(size_t num_bytes) {
+      assert(num_bytes > 0);
+      auto s = hipMalloc(&buf_, num_bytes);
+      if (s != hipSuccess || !buf_) {
+        std::abort();
+      }
+    }
+
+    ~GPUBuffer() {
+      auto s = hipFree(buf_);
+      buf_ = nullptr;
+      if (s != hipSuccess) {
+        std::abort();
+      }
+    }
+
+    void* ptr() { return buf_; }
+    void* ptr() const { return buf_; }
+
+    GPUBuffer(const GPUBuffer&) = delete;
+    GPUBuffer& operator = (const GPUBuffer&) = delete;
+
+    GPUBuffer(GPUBuffer&& that): 
+      buf_(std::move(that.buf_)) {
+        that.buf_ = nullptr; // take over ownership
+      }
+
+    GPUBuffer& operator = (GPUBuffer&& that) {
+      std::swap(this->buf_, that.buf_);
+      return *this;
+    }
+
+  private:
+    void* buf_ = nullptr;
+  }; 
+
+public:
+  Workspace() = default;
+  
+  Workspace(const Workspace&) = delete;
+  Workspace& operator = (const Workspace&) = delete;
+  Workspace(Workspace&&) = default;
+  Workspace& operator = (Workspace&&) = default;
+
+  size_t size() const { return sz_; }
+
+  void resize(size_t sz_in_bytes) {
+    sz_ = sz_in_bytes;
+    AdjustToSize();
+  }
+
+
+  // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is
+  // passed to capture the size. Must call AdjustToSize() after calling such a method
+  size_t* SizePtr() { return &sz_; }
+
+#if 0
+  auto ptr() const { return data_.get(); }
+  auto ptr() { return data_.get(); }
+  void AdjustToSize() {
+    if (sz_ == 0) {
+      data_.reset();
+    } else {
+      data_ = handle_.Create(sz_);
+    }
+  }
+#else
+  auto ptr() const { return gpu_buf_.ptr(); }
+
+  auto ptr() { return gpu_buf_.ptr(); }
+
+  void AdjustToSize() {
+    if (sz_ != 0) {
+      gpu_buf_ = GPUBuffer(sz_);
+    } else {
+      gpu_buf_ = GPUBuffer();
+    }
+  }
+#endif
+
+  template <typename V>
+  void Write(const V& vec) {
+    using T = typename V::value_type;
+    auto bytes = vec.size() * sizeof(T);
+    resize(bytes);
+    auto s = hipMemcpyHostToDevice(this->ptr(), &vec[0], bytes);
+    if(s != hipSuccess) {
+      abort();
+    }
+  }
+
+  template <typename V>
+  V Read() const {
+    using T = typename V::value_type;
+    size_t num_elem = size() / sizeof(T);
+    V ret(num_elem);
+    auto s = hipMemcpyDeviceToHost(&ret[0], ptr(), size());
+    if (s != hipSuccess) {
+      abort();
+    }
+    return ret;
+  }
+
+private:
+  // miopen::Handle& handle_;
+  // miopen::Allocator::ManageDataPtr data_{};
+  GPUBuffer gpu_buf_{};
+  size_t sz_{};
+
+
+};

From 4b3307912512433d66417cc28b6b91c13bf22128 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Wed, 15 Nov 2023 03:58:42 +0000
Subject: [PATCH 05/14] added an abstraction for workspace

---
 test/conv_common.hpp        | 131 +++++++++++++--------------------
 test/find_2_conv.cpp        |   2 -
 test/find_db.cpp            |   1 +
 test/gru_common.hpp         |  68 ++++++++++--------
 test/lstm_common.hpp        | 140 +++++++++++++++++++-----------------
 test/main.cpp               | 108 ++++++----------------------
 test/pooling_common.hpp     |   4 +-
 test/reduce_test.cpp        |  10 +--
 test/rnn_vanilla_common.hpp |  13 ++--
 test/tensor_reorder.cpp     |  74 +++++--------------
 test/workspace.hpp          |   9 +--
 11 files changed, 219 insertions(+), 341 deletions(-)

diff --git a/test/conv_common.hpp b/test/conv_common.hpp
index 58904c2b65..5fd71e2f2a 100644
--- a/test/conv_common.hpp
+++ b/test/conv_common.hpp
@@ -623,8 +623,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                 {
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    wspace.resize(workspace_size);
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     filter.FindConvBwdDataAlgorithm(handle,
                                                     input.desc,
@@ -689,8 +688,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                 {
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    wspace.resize(workspace_size);
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     filter.FindConvFwdAlgorithm(handle,
                                                 input.desc,
@@ -802,8 +800,7 @@ struct verify_forward_conv : conv_base<T, Tout>
 
                 if(api == ConvApi::Find_1_0)
                 {
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    wspace.resize(workspace_size);
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
@@ -871,8 +868,7 @@ struct verify_forward_conv : conv_base<T, Tout>
             {
                 if(api == ConvApi::Find_1_0)
                 {
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    wspace.resize(workspace_size);
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
@@ -1080,6 +1076,8 @@ struct verify_backward_conv : conv_base<T>
         auto wei_dev = handle.Write(weights.data);
         auto in_dev  = handle.Write(rinput.data);
 
+        Workspace wspace{};
+
         miopenConvSolution_t selected;
         bool fallback_path_taken = false;
         std::size_t count        = 0;
@@ -1095,8 +1093,7 @@ struct verify_backward_conv : conv_base<T>
         switch(api)
         {
         case ConvApi::Immediate: {
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            wspace.resize(workspace_size);
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1144,18 +1141,14 @@ struct verify_backward_conv : conv_base<T>
                 });
                 selected = std::move(solutions.front());
 
-                std::size_t ws_size;
-
-                ws_size = filter.GetForwardSolutionWorkspaceSize(
+                [[maybe_unused]] std::size_t ws_size = filter.GetForwardSolutionWorkspaceSize(
                     handle, weights.desc, out.desc, rinput.desc, selected.solution_id);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
-                workspace_dev.reset();
                 if(selected.workspace_size > 0)
                 {
-                    workspace.resize(selected.workspace_size);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(selected.workspace_size);
                 }
 
                 filter.ConvolutionForwardImmediate(handle,
@@ -1165,8 +1158,8 @@ struct verify_backward_conv : conv_base<T>
                                                    out_dev.get(),
                                                    rinput.desc,
                                                    in_dev.get(),
-                                                   workspace_dev.get(),
-                                                   ws_size,
+                                                   wspace.ptr(),
+                                                   wspace.size(),
                                                    selected.solution_id);
             }
             else
@@ -1183,8 +1176,8 @@ struct verify_backward_conv : conv_base<T>
                                                     1,
                                                     &ret_algo_count,
                                                     &perf,
-                                                    workspace_dev.get(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     search);
                 }
                 count = filter.GetSolutionCount(ctx, problem);
@@ -1211,18 +1204,14 @@ struct verify_backward_conv : conv_base<T>
                 });
                 selected = std::move(solutions.front());
 
-                std::size_t ws_size;
-
-                ws_size = filter.GetBackwardSolutionWorkspaceSize(
+                [[maybe_unused]] std::size_t ws_size = filter.GetBackwardSolutionWorkspaceSize(
                     handle, out.desc, weights.desc, rinput.desc, selected.solution_id);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
-                workspace_dev.reset();
                 if(selected.workspace_size > 0)
                 {
-                    workspace.resize(selected.workspace_size);
-                    workspace_dev = handle.Write(workspace);
+                  wspace.resize(selected.workspace_size);
                 }
 
                 filter.ConvolutionBackwardImmediate(handle,
@@ -1232,16 +1221,14 @@ struct verify_backward_conv : conv_base<T>
                                                     wei_dev.get(),
                                                     rinput.desc,
                                                     in_dev.get(),
-                                                    workspace_dev.get(),
-                                                    ws_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     selected.solution_id);
             }
             break;
         }
         case ConvApi::Find_1_0: {
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1260,15 +1247,13 @@ struct verify_backward_conv : conv_base<T>
                                             1,
                                             &ret_algo_count,
                                             &perf,
-                                            workspace_dev.get(),
-                                            workspace_size,
+                                            wspace.ptr(),
+                                            wspace.size(),
                                             search);
 
-                workspace_dev.reset();
                 if(perf.memory > 0)
                 {
-                    workspace.resize(perf.memory);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(perf.memory);
                 }
 
                 filter.ConvolutionForward(handle,
@@ -1281,8 +1266,8 @@ struct verify_backward_conv : conv_base<T>
                                           &beta,
                                           rinput.desc,
                                           in_dev.get(),
-                                          workspace_dev.get(),
-                                          workspace_size);
+                                          wspace.ptr(),
+                                          wspace.size());
             }
             else
             {
@@ -1296,15 +1281,13 @@ struct verify_backward_conv : conv_base<T>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                workspace_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
 
-                workspace_dev.reset();
                 if(perf.memory > 0)
                 {
-                    workspace.resize(perf.memory);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(perf.memory);
                 }
 
                 filter.ConvolutionBackwardData(handle,
@@ -1317,8 +1300,8 @@ struct verify_backward_conv : conv_base<T>
                                                &beta,
                                                rinput.desc,
                                                in_dev.get(),
-                                               workspace_dev.get(),
-                                               workspace_size);
+                                               wspace.ptr(),
+                                               wspace.size());
             }
 
             /// \ref read_solver_name
@@ -1448,6 +1431,7 @@ struct verify_backward_weights_conv : conv_base<T>
         auto out_dev = handle.Write(out.data);
         auto wei_dev = handle.Write(rweights.data);
         auto in_dev  = handle.Write(input.data);
+        Workspace wspace{};
 
         miopenConvSolution_t selected;
         bool fallback_path_taken = false;
@@ -1464,9 +1448,7 @@ struct verify_backward_weights_conv : conv_base<T>
         switch(api)
         {
         case ConvApi::Immediate: {
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1484,8 +1466,8 @@ struct verify_backward_weights_conv : conv_base<T>
                     1,
                     &ret_algo_count,
                     &perf,
-                    workspace_dev.get(),
-                    workspace_size,
+                    wspace.ptr(),
+                    wspace.size(),
                     search);
             }
 
@@ -1512,9 +1494,7 @@ struct verify_backward_weights_conv : conv_base<T>
             });
             selected = std::move(solutions.front());
 
-            std::size_t ws_size;
-
-            ws_size = filter.GetWrwSolutionWorkspaceSize(
+            [[maybe_unused]] std::size_t ws_size = filter.GetWrwSolutionWorkspaceSize(
                 handle,
                 filter.mode == miopenTranspose ? input.desc : out.desc,
                 filter.mode == miopenTranspose ? out.desc : input.desc,
@@ -1523,11 +1503,9 @@ struct verify_backward_weights_conv : conv_base<T>
 
             filter.CompileSolution(ctx, problem, selected.solution_id);
 
-            workspace_dev.reset();
             if(selected.workspace_size > 0)
             {
-                workspace.resize(selected.workspace_size);
-                workspace_dev = handle.Write(workspace);
+                wspace.resize(selected.workspace_size);
             }
 
             filter.ConvolutionWrwImmediate(
@@ -1538,16 +1516,14 @@ struct verify_backward_weights_conv : conv_base<T>
                 filter.mode == miopenTranspose ? out_dev.get() : in_dev.get(),
                 rweights.desc,
                 wei_dev.get(),
-                workspace_dev.get(),
-                ws_size,
+                wspace.ptr(),
+                wspace.size(),
                 selected.solution_id);
             break;
         }
         case ConvApi::Find_1_0: {
 
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1564,15 +1540,13 @@ struct verify_backward_weights_conv : conv_base<T>
                 1,
                 &ret_algo_count,
                 &perf,
-                workspace_dev.get(),
-                workspace_size,
+                wspace.ptr(),
+                wspace.size(),
                 search);
 
-            workspace_dev.reset();
             if(perf.memory > 0)
             {
-                workspace.resize(perf.memory);
-                workspace_dev = handle.Write(workspace);
+                wspace.resize(perf.memory);
             }
 
             filter.ConvolutionBackwardWeights(
@@ -1586,8 +1560,8 @@ struct verify_backward_weights_conv : conv_base<T>
                 &beta,
                 rweights.desc,
                 wei_dev.get(),
-                workspace_dev.get(),
-                workspace_size);
+                wspace.ptr(),
+                wspace.size());
 
             /// \ref read_solver_name
             const auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken);
@@ -1739,9 +1713,8 @@ struct verify_forward_conv_int8 : conv_base<T>
                                     wei_vpad_dev.get());
         }
 
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-        std::vector<char> workspace(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{};
+        wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
         int ret_algo_count;
         miopenConvAlgoPerf_t perf;
@@ -1758,8 +1731,8 @@ struct verify_forward_conv_int8 : conv_base<T>
                                         1,
                                         &ret_algo_count,
                                         &perf,
-                                        workspace_dev.get(),
-                                        workspace_size,
+                                        wspace.ptr(),
+                                        wspace.size(),
                                         search);
         }
 
@@ -1787,9 +1760,7 @@ struct verify_forward_conv_int8 : conv_base<T>
         });
         auto selected = std::move(solutions.front());
 
-        std::size_t ws_size;
-
-        ws_size =
+        [[maybe_unused]] std::size_t ws_size =
             filter.GetForwardSolutionWorkspaceSize(handle,
                                                    (is_transform ? weight_vpad_desc : weights.desc),
                                                    (is_transform ? input_vpad_desc : input.desc),
@@ -1798,11 +1769,9 @@ struct verify_forward_conv_int8 : conv_base<T>
 
         filter.CompileSolution(ctx, problem, selected.solution_id);
 
-        workspace_dev.reset();
         if(selected.workspace_size > 0)
         {
-            workspace.resize(selected.workspace_size);
-            workspace_dev = handle.Write(workspace);
+            wspace.resize(selected.workspace_size);
         }
 
         filter.ConvolutionForwardImmediate(handle,
@@ -1812,8 +1781,8 @@ struct verify_forward_conv_int8 : conv_base<T>
                                            (is_transform ? in_vpad_dev.get() : in_dev.get()),
                                            rout.desc,
                                            out_dev.get(),
-                                           workspace_dev.get(),
-                                           ws_size,
+                                           wspace.ptr(),
+                                           wspace.size(),
                                            selected.solution_id);
 
         if(count != 0)
diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp
index 9148436ec7..8ed26211e3 100644
--- a/test/find_2_conv.cpp
+++ b/test/find_2_conv.cpp
@@ -312,8 +312,6 @@ struct Find2Test : test_driver
     {
         std::cerr << "Running a solution..." << std::endl;
 
-        auto& handle_deref = get_handle();
-
         std::size_t workspace_size;
         EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size),
                      miopenStatusSuccess);
diff --git a/test/find_db.cpp b/test/find_db.cpp
index 974136bc59..3977f9e220 100644
--- a/test/find_db.cpp
+++ b/test/find_db.cpp
@@ -27,6 +27,7 @@
 #include "test.hpp"
 #include "driver.hpp"
 #include "get_handle.hpp"
+#include "workspace.hpp"
 
 #include <miopen/convolution.hpp>
 #include <miopen/conv/problem_description.hpp>
diff --git a/test/gru_common.hpp b/test/gru_common.hpp
index 51e3c09012..736ff96dfc 100644
--- a/test/gru_common.hpp
+++ b/test/gru_common.hpp
@@ -35,6 +35,7 @@
 #include "verify.hpp"
 #include "rnn_util.hpp"
 #include "random.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <cmath>
 #include <ctime>
@@ -1964,7 +1965,7 @@ struct verify_forward_infer_gru
         auto&& handle = get_handle();
 
         size_t out_sz        = 0;
-        size_t workSpaceSize = 0;
+        size_t workspace_size = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -1979,9 +1980,9 @@ struct verify_forward_infer_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        std::vector<T> workSpace(workspace_size / sizeof(T));
 
         auto input_dev = handle.Write(input);
 
@@ -2028,7 +2029,7 @@ struct verify_forward_infer_gru
                                   &hiddenDesc,
                                   nullptr,
                                   workSpace_dev.get(),
-                                  workSpaceSize);
+                                  workspace_size);
 
 #if(MIO_GRU_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -2249,7 +2250,7 @@ struct verify_forward_train_gru
         auto&& handle = get_handle();
 
         size_t out_sz           = 0;
-        size_t workSpaceSize    = 0;
+        size_t workspace_size    = 0;
         size_t reserveSpaceSize = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
@@ -2265,11 +2266,11 @@ struct verify_forward_train_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
 
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        std::vector<T> workSpace(workspace_size / sizeof(T));
         std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
 
         auto input_dev = handle.Write(input);
@@ -2319,7 +2320,7 @@ struct verify_forward_train_gru
                                  &hiddenDesc,
                                  nullptr,
                                  workSpace_dev.get(),
-                                 workSpaceSize,
+                                 workspace_size,
                                  reserveSpace_dev.get(),
                                  reserveSpaceSize);
 
@@ -2484,7 +2485,7 @@ struct verify_backward_data_gru
         int bi        = dirMode != 0 ? 2 : 1;
         int hy_h      = hiddenSize;
         int bi_stride = bi * hy_h;
-        size_t workSpaceSize;
+        size_t workspace_size;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -2494,8 +2495,8 @@ struct verify_backward_data_gru
         // Outputs ----------
         size_t in_sz = 0;
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, inputDescs.data(), &in_sz);
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        std::vector<T> workSpace(workspace_size / sizeof(T));
         std::vector<T> dx(in_sz / sizeof(T));
         std::vector<T> dhx(initHidden.size());
 
@@ -2563,7 +2564,6 @@ struct verify_backward_data_gru
         auto&& handle = get_handle();
 
         size_t out_sz        = 0;
-        size_t workSpaceSize = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -2578,16 +2578,19 @@ struct verify_backward_data_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        auto workSpace_dev = handle.Write(workSpace);
+        size_t workspace_size = 0;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{};
+        wspace.resize(workspace_size);
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
         auto yin_dev          = handle.Write(yin);
         auto dyin_dev         = handle.Write(dy);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
         auto weights_dev      = handle.Write(weights);
 
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
+
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -2633,15 +2636,15 @@ struct verify_backward_data_gru
                               ((nodhx) ? nullptr : dhx_dev.get()),
                               &hiddenDesc,
                               nullptr,
-                              workSpace_dev.get(),
-                              workSpaceSize,
-                              reserveSpace_dev.get(),
-                              reserveSpace.size() * sizeof(T));
+                              wspace.ptr(),
+                              wspace.size(),
+                              rspace.ptr(),
+                              rspace.size());
 
         auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                       (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
-                                      handle.Read<T>(reserveSpace_dev, reserveSpace.size()),
-                                      handle.Read<T>(workSpace_dev, workSpace.size()));
+                                      rspace.Read<std::vector<T>>(),
+                                      wspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -2840,8 +2843,11 @@ struct verify_backward_weights_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        Workspace wspace{};
+        wspace.Write(workSpace);
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
+
         std::vector<T> dweights(weightSize);
         auto dweights_dev = handle.Write(dweights);
         miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize});
@@ -2869,10 +2875,10 @@ struct verify_backward_weights_gru
                                  dy_dev.get(),
                                  &weightDesc,
                                  dweights_dev.get(),
-                                 workSpace_dev.get(),
-                                 workSpace.size() * sizeof(T),
-                                 reserveSpace_dev.get(),
-                                 reserveSpace.size() * sizeof(T));
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -3123,10 +3129,10 @@ struct gru_basic_driver : test_driver
         size_t reserveSpaceSize;
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-        size_t workSpaceSize;
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        size_t workspace_size;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize + 2 * out_sz +
+        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size + 2 * out_sz +
                            (in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) +
                             (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz)) *
                                sizeof(T);
diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index 1f0d5f98cb..29f132c1e7 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -27,6 +27,7 @@
 #ifndef GUARD_MIOPEN_TEST_LSTM_COMMON_HPP
 #define GUARD_MIOPEN_TEST_LSTM_COMMON_HPP
 
+#include "workspace.hpp"
 #include "driver.hpp"
 #include "dropout_util.hpp"
 #include "get_handle.hpp"
@@ -96,6 +97,8 @@ struct verify_backward_data_lstm
     std::vector<T> initHidden; // HX
     std::vector<T> initCell;   // CX
     std::vector<T> weights;
+    std::vector<T>& RSVgpu;
+    std::vector<T>& RSVcpu;
     std::vector<int> batch_seq;
     int hiddenSize;
     int seqLength;
@@ -115,8 +118,6 @@ struct verify_backward_data_lstm
     bool nodcx;
     bool use_dropout;
     bool use_seqPadding;
-    typename std::vector<T>::iterator RSVgpu;
-    typename std::vector<T>::iterator RSVcpu;
 
     verify_backward_data_lstm(miopenRNNDescriptor_t pRD,
                               const std::vector<T>& py,
@@ -153,6 +154,8 @@ struct verify_backward_data_lstm
           initHidden(phx),
           initCell(pcx),
           weights(pW),
+          RSVgpu(pRSVgpu),
+          RSVcpu(pRSVcpu),
           batch_seq(pBS),
           hiddenSize(pHS),
           seqLength(pS),
@@ -171,9 +174,7 @@ struct verify_backward_data_lstm
           nodhx(pnodhx),
           nodcx(pnodcx),
           use_dropout(puse_dropout),
-          use_seqPadding(puse_seqPadding),
-          RSVgpu(pRSVgpu.begin()),
-          RSVcpu(pRSVcpu.begin())
+          use_seqPadding(puse_seqPadding)
     {
         if(!nohx)
             initHidden = phx; // this may be intentionally a nullptr
@@ -248,6 +249,8 @@ struct verify_backward_weights_lstm
     std::vector<T> input;      // Y
     std::vector<T> dy;         // dY
     std::vector<T> initHidden; // HX
+    std::vector<T> reserveSpace_gpu;
+    std::vector<T> reserveSpace_cpu;
     std::vector<T> workSpace;
     std::vector<int> batch_seq;
     int weightSize;
@@ -264,8 +267,6 @@ struct verify_backward_weights_lstm
     bool nohx;
     bool use_dropout;
     bool use_seqPadding;
-    typename std::vector<T> reserveSpace_gpu;
-    typename std::vector<T> reserveSpace_cpu;
 
     verify_backward_weights_lstm(miopenRNNDescriptor_t pRD,
                                  const std::vector<T>& px,
@@ -291,6 +292,8 @@ struct verify_backward_weights_lstm
         : input(px),
           dy(pdy),
           initHidden(phx),
+          reserveSpace_gpu(pRSVgpu),
+          reserveSpace_cpu(pRSVcpu),
           workSpace(pWS),
           batch_seq(pBS),
           weightSize(pW),
@@ -306,9 +309,7 @@ struct verify_backward_weights_lstm
           realHiddenSize(pHXZ),
           nohx(pnohx),
           use_dropout(puse_dropout),
-          use_seqPadding(puse_seqPadding),
-          reserveSpace_gpu(pRSVgpu),
-          reserveSpace_cpu(pRSVcpu)
+          use_seqPadding(puse_seqPadding)
     {
         if(!nohx)
             initHidden = phx; // this may be intentionally a nullptr
@@ -534,7 +535,6 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
         auto&& handle = get_handle();
 
         size_t out_sz        = 0;
-        size_t workSpaceSize = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -549,9 +549,11 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        size_t workspace_size = 0;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        Workspace wspace{};
+        wspace.resize(workspace_size);
 
         auto input_dev = handle.Write(input);
 
@@ -565,8 +567,6 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
         auto cy = initCell;
         std::fill(cy.begin(), cy.end(), 0.);
 
-        auto workSpace_dev = handle.Write(workSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -577,6 +577,9 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
         wlen[0] = weights.size();
         miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen);
 
+        /// \todo: fix the handle.Write() calls below because they generate
+        /// temporary objects that may get destroyed before the
+        /// miopenRNNForwardInference call happens
         miopenRNNForwardInference(&handle,
                                   rnnDesc,
                                   seqLength,
@@ -594,8 +597,8 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
                                   ((nohy) ? nullptr : handle.Write(hy).get()),
                                   &hiddenDesc,
                                   ((nocy) ? nullptr : handle.Write(cy).get()),
-                                  workSpace_dev.get(),
-                                  workSpaceSize);
+                                  wspace.ptr(),
+                                  wspace.size());
 
 #if(MIO_LSTM_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -673,9 +676,10 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
     using verify_forward_lstm<T>::nocy;
     using verify_forward_lstm<T>::use_seqPadding;
 
+    std::vector<T>& RSVgpu;
+    std::vector<T>& RSVcpu;
+
     bool use_dropout;
-    typename std::vector<T>::iterator RSVgpu;
-    typename std::vector<T>::iterator RSVcpu;
 
     verify_forward_train_lstm(miopenRNNDescriptor_t pRD,
                               const std::vector<T>& px,
@@ -700,7 +704,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
                               const bool pnocy           = false,
                               const bool puse_dropout    = false,
                               const bool puse_seqPadding = false)
-        : RSVgpu(pRSVgpu.begin()), RSVcpu(pRSVcpu.begin())
+        : RSVgpu(pRSVgpu), RSVcpu(pRSVcpu)
     {
         input          = px;
         initHidden     = phx;
@@ -861,7 +865,10 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
             ChangeDataPadding(*packed_output, output, batch_seq, batch_seq[0], out_h, true);
         }
 
-        std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu);
+        if (reserveSpace.size() != RSVcpu.size()) {
+          std::abort();
+        }
+        std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin());
 
         auto retSet = std::make_tuple(
             output, (nohy ? initHidden : hiddenState), (nocy ? initCell : cellState));
@@ -908,17 +915,16 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
         std::fill(output.begin(), output.end(), static_cast<T>(0));
         auto output_dev = handle.Write(output);
 
-        size_t workSpaceSize    = 0;
+        size_t workspace_size    = 0;
         size_t reserveSpaceSize = 0;
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
 
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
-
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        Workspace wspace{};
+        Workspace rspace{};
+        wspace.resize(workspace_size);
+        rspace.resize(reserveSpaceSize);
 
         auto weights_dev = handle.Write(weights);
 
@@ -957,10 +963,10 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
                                  ((nohy) ? nullptr : hy_dev.get()),
                                  &hiddenDesc,
                                  ((nocy) ? nullptr : cy_dev.get()),
-                                 workSpace_dev.get(),
-                                 workSpaceSize,
-                                 reserveSpace_dev.get(),
-                                 reserveSpaceSize);
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_LSTM_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -969,9 +975,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
             printf("GPU outdata[%d]: %f\n", i, outdata[i]);
         }
 #endif
-        reserveSpace =
-            handle.Read<T>(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
-        std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu);
+        RSVgpu = rspace.Read<std::vector<T>>();
 
         std::vector<T> output_gpu = handle.Read<T>(output_dev, output.size());
 
@@ -1049,7 +1053,7 @@ verify_backward_data_lstm<T>::cpu() const
     int hy_h      = hiddenSize;
     int bi_stride = bi * hy_h;
     int out_h     = hiddenSize * ((dirMode != 0) ? 2 : 1);
-    size_t workSpaceSize;
+    size_t workspace_size;
 
     std::vector<miopen::TensorDescriptor> inputCPPDescs;
     std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -1066,8 +1070,8 @@ verify_backward_data_lstm<T>::cpu() const
                                       true,
                                       use_seqPadding);
 
-    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-    std::vector<T> workSpace(workSpaceSize / sizeof(T));
+    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+    std::vector<T> workSpace(workspace_size / sizeof(T));
     std::vector<T> dx(in_sz);
     std::vector<T> dhx(initHidden.size());
     std::vector<T> dcx(initHidden.size());
@@ -1086,8 +1090,7 @@ verify_backward_data_lstm<T>::cpu() const
         reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) / sizeof(T);
     }
 
-    std::vector<T> reserveSpace(reserveSpaceSize);
-    std::copy(RSVcpu, RSVcpu + reserveSpaceSize, reserveSpace.begin());
+    std::vector<T> reserveSpace(RSVcpu.begin(), RSVcpu.begin() + reserveSpaceSize);
 
     std::vector<T> converted_dinput;
     std::vector<T> converted_output;
@@ -1119,7 +1122,7 @@ verify_backward_data_lstm<T>::cpu() const
         packed_doutput = &converted_doutput;
 
         // WA
-        wa_workSpace.resize(workSpaceSize / sizeof(T) - (packedXInSize + packedYOutSize));
+        wa_workSpace.resize(workspace_size / sizeof(T) - (packedXInSize + packedYOutSize));
         wa_shifted_workSpace = &wa_workSpace;
     }
     else
@@ -1186,7 +1189,7 @@ verify_backward_data_lstm<T>::cpu() const
                   workSpace.begin() + converted_doutput.size() + converted_dinput.size());
     }
 
-    std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu);
+    std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin());
 
     // TODO: remove workSpace
     auto retSet =
@@ -1210,7 +1213,6 @@ verify_backward_data_lstm<T>::gpu() const
 
     auto&& handle = get_handle();
 
-    size_t workSpaceSize = 0;
 
     std::vector<miopen::TensorDescriptor> inputCPPDescs;
     std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -1225,19 +1227,22 @@ verify_backward_data_lstm<T>::gpu() const
                           hiddenSize * ((dirMode != 0) ? 2 : 1),
                           miopen::deref(rnnDesc).dataType);
 
-    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-    std::vector<T> workSpace(workSpaceSize / sizeof(T));
-    auto workSpace_dev = handle.Write(workSpace);
+    size_t workspace_size = 0;
+    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+    Workspace wspace{};
+    wspace.resize(workspace_size);
 
-    size_t reserveSpaceSize;
+    size_t reserveSpaceSize = 0;
     miopenGetRNNTrainingReserveSize(
         &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-    std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
-    std::copy(RSVgpu, RSVgpu + reserveSpace.size(), reserveSpace.begin());
+    if (reserveSpaceSize != RSVgpu.size()) {
+      std::abort();
+    }
+    Workspace rspace{};
+    rspace.Write(RSVgpu);
 
     auto yin_dev          = handle.Write(yin);
     auto dyin_dev         = handle.Write(dy);
-    auto reserveSpace_dev = handle.Write(reserveSpace);
     auto weights_dev      = handle.Write(weights);
 
     std::vector<int> hlens(3, 0);
@@ -1284,18 +1289,17 @@ verify_backward_data_lstm<T>::gpu() const
                           ((nodhx) ? nullptr : dhx_dev.get()),
                           &hiddenDesc,
                           ((nodcx) ? nullptr : dcx_dev.get()),
-                          workSpace_dev.get(),
-                          workSpaceSize,
-                          reserveSpace_dev.get(),
-                          reserveSpace.size() * sizeof(T));
+                          wspace.ptr(),
+                          wspace.size(),
+                          rspace.ptr(),
+                          rspace.size());
 
-    reserveSpace = handle.Read<T>(reserveSpace_dev, reserveSpace.size());
-    std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu);
+    RSVgpu = rspace.Read<std::vector<T>>();
     // TODO: remove workSpace
     auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                   (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
                                   (nodcx ? initCell : handle.Read<T>(dcx_dev, dcx.size())),
-                                  handle.Read<T>(workSpace_dev, workSpace.size()));
+                                  wspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
     auto t_end = std::chrono::high_resolution_clock::now();
@@ -1427,8 +1431,12 @@ std::vector<T> verify_backward_weights_lstm<T>::gpu() const
                           hiddenSize * ((dirMode != 0) ? 2 : 1),
                           miopen::deref(rnnDesc).dataType);
 
-    auto workSpace_dev    = handle.Write(workSpace);
-    auto reserveSpace_dev = handle.Write(reserveSpace_gpu);
+    Workspace wspace{};
+    wspace.Write(workSpace);
+
+    Workspace rspace{};
+    rspace.Write(reserveSpace_gpu);
+
     std::vector<T> dweights(weightSize);
     auto dweights_dev = handle.Write(dweights);
     miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize});
@@ -1452,10 +1460,10 @@ std::vector<T> verify_backward_weights_lstm<T>::gpu() const
                              dy_dev.get(),
                              &weightDesc,
                              dweights_dev.get(),
-                             workSpace_dev.get(),
-                             workSpace.size() * sizeof(T),
-                             reserveSpace_dev.get(),
-                             reserveSpace_gpu.size() * sizeof(T));
+                             wspace.ptr(),
+                             wspace.size(),
+                             rspace.ptr(),
+                             rspace.size());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
     auto t_end = std::chrono::high_resolution_clock::now();
@@ -1732,10 +1740,10 @@ struct lstm_basic_driver : test_driver
                                            false,
                                            usePadding);
 
-        size_t workSpaceSize;
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        size_t workspace_size;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize +
+        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size +
                            (2 * out_sz + in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) +
                             (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz) + (nocx ? 0 : hx_sz) +
                             (nocy ? 0 : hx_sz) + (nodcx ? 0 : hx_sz) + (nodcy ? 0 : hx_sz)) *
diff --git a/test/main.cpp b/test/main.cpp
index 8d7f515f22..7a5d0cd5b0 100644
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -23,8 +23,10 @@
  * SOFTWARE.
  *
  *******************************************************************************/
+#include "get_handle.hpp"
 #include "test.hpp"
 #include "random.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <iterator>
 #include <memory>
@@ -41,16 +43,10 @@
 struct handle_fixture
 {
     miopenHandle_t handle{};
-#if MIOPEN_BACKEND_OPENCL
-    cl_command_queue q{};
-#endif
 
     handle_fixture()
     {
         miopenCreate(&handle);
-#if MIOPEN_BACKEND_OPENCL
-        miopenGetStream(handle, &q);
-#endif
     }
 
     ~handle_fixture() { miopenDestroy(handle); }
@@ -171,7 +167,6 @@ struct conv_forward : output_tensor_fixture
     {
         float alpha = 1, beta = 0;
 
-        // Setup OpenCL buffers
 
         int n, h, c, w;
         STATUS(miopenGet4dTensorDescriptorLengths(inputTensor, &n, &c, &h, &w));
@@ -186,15 +181,13 @@ struct conv_forward : output_tensor_fixture
         size_t sz_fwd_workspace;
         STATUS(miopenConvolutionForwardGetWorkSpaceSize(
             handle, convFilter, inputTensor, convDesc, outputTensor, &sz_fwd_workspace));
-        // OCL fails to allocate zero workspace. Let's allocate small workspace instead to simplify
-        // subsequent code.
-        if(sz_fwd_workspace == 0)
-            sz_fwd_workspace = 256;
+
+        Workspace wspace{};
+        wspace.resize(sz_fwd_workspace);
 
         std::vector<float> in(sz_in);
         std::vector<float> wei(sz_wei);
         std::vector<float> out(sz_out);
-        std::vector<float> fwd_workspace(sz_fwd_workspace / 4);
 
         for(size_t i = 0; i < sz_in; i++)
         {
@@ -205,60 +198,16 @@ struct conv_forward : output_tensor_fixture
             wei[i] = prng::gen_A_to_B(-0.5f, 0.5f) * 0.001f;
         }
 
-#if MIOPEN_BACKEND_OPENCL
-
-        cl_context ctx;
-        clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, nullptr);
-
-        cl_int status  = CL_SUCCESS;
-        cl_mem in_dev  = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_in, nullptr, &status);
-        cl_mem wei_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_wei, nullptr, nullptr);
-        cl_mem out_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, 4 * sz_out, nullptr, nullptr);
-        cl_mem fwd_workspace_dev =
-            clCreateBuffer(ctx, CL_MEM_READ_WRITE, sz_fwd_workspace, nullptr, nullptr);
-
-        status =
-            clEnqueueWriteBuffer(q, in_dev, CL_TRUE, 0, 4 * sz_in, in.data(), 0, nullptr, nullptr);
-        status |= clEnqueueWriteBuffer(
-            q, wei_dev, CL_TRUE, 0, 4 * sz_wei, wei.data(), 0, nullptr, nullptr);
-        status |= clEnqueueWriteBuffer(
-            q, out_dev, CL_TRUE, 0, 4 * sz_out, out.data(), 0, nullptr, nullptr);
-        status |= clEnqueueWriteBuffer(q,
-                                       fwd_workspace_dev,
-                                       CL_TRUE,
-                                       0,
-                                       sz_fwd_workspace,
-                                       fwd_workspace.data(),
-                                       0,
-                                       nullptr,
-                                       nullptr);
-        EXPECT(status == CL_SUCCESS);
-
-#elif MIOPEN_BACKEND_HIP
-
-        void* in_dev;
-        void* wei_dev;
-        void* out_dev;
-        void* fwd_workspace_dev;
-
-        EXPECT(hipMalloc(&in_dev, 4 * sz_in) == hipSuccess);
-        EXPECT(hipMalloc(&wei_dev, 4 * sz_wei) == hipSuccess);
-        EXPECT(hipMalloc(&out_dev, 4 * sz_out) == hipSuccess);
-        EXPECT(hipMalloc(&fwd_workspace_dev, sz_fwd_workspace) == hipSuccess);
-
-        EXPECT(hipMemcpy(in_dev, in.data(), 4 * sz_in, hipMemcpyHostToDevice) == hipSuccess);
-        EXPECT(hipMemcpy(wei_dev, wei.data(), 4 * sz_wei, hipMemcpyHostToDevice) == hipSuccess);
-        EXPECT(hipMemcpy(out_dev, out.data(), 4 * sz_out, hipMemcpyHostToDevice) == hipSuccess);
-        EXPECT(hipMemcpy(fwd_workspace_dev,
-                         fwd_workspace.data(),
-                         sz_fwd_workspace,
-                         hipMemcpyHostToDevice) == hipSuccess);
+        auto& mhand = get_handle();
+
+        auto in_dev = mhand.Write(in);
+        auto wei_dev = mhand.Write(wei);
+        auto out_dev = mhand.Write(out);
 
-#endif
         int value = 10;
-        STATUS(miopenSetTensor(handle, inputTensor, in_dev, &value));
+        STATUS(miopenSetTensor(handle, inputTensor, in_dev.get(), &value));
 
-        STATUS(miopenScaleTensor(handle, inputTensor, in_dev, &alpha));
+        STATUS(miopenScaleTensor(handle, inputTensor, in_dev.get(), &alpha));
 
         float time;
 
@@ -280,32 +229,32 @@ struct conv_forward : output_tensor_fixture
             STATUS(miopenFindConvolutionForwardAlgorithm(
                 used_handle,
                 inputTensor,
-                in_dev,
+                in_dev.get(),
                 convFilter,
-                wei_dev,
+                wei_dev.get(),
                 convDesc,
                 outputTensor,
-                out_dev,
+                out_dev.get(),
                 1,
                 &ret_algo_count,
                 &perf,
-                fwd_workspace_dev,
-                sz_fwd_workspace,
+                wspace.ptr(),
+                wspace.size(),
                 0)); // MD: Not performing exhaustiveSearch by default for now
 
             STATUS(miopenConvolutionForward(used_handle,
                                             &alpha,
                                             inputTensor,
-                                            in_dev,
+                                            in_dev.get(),
                                             convFilter,
-                                            wei_dev,
+                                            wei_dev.get(),
                                             convDesc,
                                             perf.fwd_algo,
                                             &beta,
                                             outputTensor,
-                                            out_dev,
-                                            fwd_workspace_dev,
-                                            sz_fwd_workspace));
+                                            out_dev.get(),
+                                            wspace.ptr(),
+                                            wspace.size()));
 
             STATUS(miopenGetKernelTime(used_handle, &time));
 
@@ -323,19 +272,6 @@ struct conv_forward : output_tensor_fixture
             CHECK(time == 0.0);
         }
 
-// Potential memory leak free memory at end of function
-#if MIOPEN_BACKEND_OPENCL
-        clReleaseMemObject(in_dev);
-        clReleaseMemObject(wei_dev);
-        clReleaseMemObject(out_dev);
-        clReleaseMemObject(fwd_workspace_dev);
-
-#elif MIOPEN_BACKEND_HIP
-        hipFree(in_dev);
-        hipFree(wei_dev);
-        hipFree(out_dev);
-        hipFree(fwd_workspace_dev);
-#endif
     }
 };
 
diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp
index f5e60f5163..a98d9fb82d 100644
--- a/test/pooling_common.hpp
+++ b/test/pooling_common.hpp
@@ -217,7 +217,7 @@ struct verify_forward_pooling
                        wspace.ptr(),
                        wspace.size());
 
-        indices = wspace.Read<decltype(indices)>();
+        indices = wspace.Read<std::vector<Index>>();
         out.data = handle.Read<T>(out_dev, out.data.size());
         return out;
     }
@@ -424,7 +424,7 @@ struct verify_backward_pooling
                         // dx
                         dinput.desc,
                         din_dev.get(),
-                        wspace.prt());
+                        wspace.ptr());
 
         dinput.data = handle.Read<T>(din_dev, dinput.data.size());
         return dinput;
diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp
index fd4e48ce4f..a48d1fc9b8 100644
--- a/test/reduce_test.cpp
+++ b/test/reduce_test.cpp
@@ -77,7 +77,7 @@ struct verify_reduce_with_indices
         indices(indices_),
         alpha(alpha_),
         beta(beta_),
-        reduceOp(reduce.reduceTensorOp_)
+        reduceOp(reduce.reduceTensorOp_),
         compTypeVal(reduce.reduceTensorCompType_),
         nanOpt     (reduce.reduceTensorNanOpt_),
         indicesOpt (reduce.reduceTensorIndices_),
@@ -356,11 +356,11 @@ struct verify_reduce_with_indices
                                          ? static_cast<const void*>(&beta64)
                                          : static_cast<const void*>(&beta);
 
-        if(ws_sizeInBytes > 0)
+        if(wspace.size() > 0)
         {
             reduce.ReduceTensor(get_handle(),
-                                indices_dev.get(),
-                                indices_sizeInBytes,
+                                idxspace.ptr(),
+                                idxspace.size(),
                                 wspace.ptr(),
                                 wspace.size(),
                                 alphaPtr,
@@ -652,7 +652,7 @@ struct verify_reduce_no_indices
                                          ? static_cast<const void*>(&beta64)
                                          : static_cast<const void*>(&beta);
 
-        if(ws_sizeInBytes > 0)
+        if(wspace.size() > 0)
         {
             reduce.ReduceTensor(get_handle(),
                                 nullptr,
diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp
index 4d8f6ff1d0..9a96cfb106 100644
--- a/test/rnn_vanilla_common.hpp
+++ b/test/rnn_vanilla_common.hpp
@@ -1712,8 +1712,8 @@ struct verify_forward_train_rnn
 
         Workspace wspace{};
         wspace.resize(workSpaceSize);
-        Workspace reserveSpace{};
-        reserveSpace.resize(reserveSpaceSize);
+        Workspace rspace{};
+        rspace.resize(reserveSpaceSize);
 
         auto input_dev = handle.Write(input);
 
@@ -1727,7 +1727,6 @@ struct verify_forward_train_rnn
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto reserveSpace_dev = handle.Write(reserveSpace);
 
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1);
@@ -1758,8 +1757,8 @@ struct verify_forward_train_rnn
                                  nullptr,
                                  wspace.ptr(),
                                  wspace.size(),
-                                 reserveSpace.ptr(),
-                                 reserveSpace.size());
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_RNN_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -1772,7 +1771,7 @@ struct verify_forward_train_rnn
         auto retSet = std::make_tuple(
             handle.Read<T>(output_dev, output.size()),
             (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
-            handle.Read<T>(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T)));
+            rspace.Read<std::vector<T>>());
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
 
@@ -2259,7 +2258,7 @@ struct verify_backward_weights_rnn
                               miopen::deref(rnnDesc).dataType);
 
         Workspace wspace{};
-        wpace.Write(workSpace);
+        wspace.Write(workSpace);
         Workspace rspace{};
         rspace.Write(reserveSpace);
 
diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp
index fcf3420320..c2ce8a94a9 100644
--- a/test/tensor_reorder.cpp
+++ b/test/tensor_reorder.cpp
@@ -38,6 +38,8 @@
 #include "test.hpp"
 #include "driver.hpp"
 #include "random.hpp"
+#include "get_handle.hpp"
+#include "workspace.hpp"
 
 template <>
 struct miopen_type<uint8_t> : std::integral_constant<miopenDataType_t, miopenInt8>
@@ -288,19 +290,6 @@ bool verify_tensor(tensor<T>& t_gpu, tensor<T>& t_cpu)
 
 struct tensor_reorder_base_driver : test_driver
 {
-    miopenHandle_t handle{};
-#if MIOPEN_BACKEND_OPENCL
-    cl_command_queue q{};
-#endif
-
-    tensor_reorder_base_driver()
-    {
-        miopenCreate(&handle);
-#if MIOPEN_BACKEND_OPENCL
-        miopenGetStream(handle, &q);
-#endif
-    }
-    ~tensor_reorder_base_driver() { miopenDestroy(handle); }
 
     static std::vector<uint32_t> get_dim_3_size() { return {1, 9}; }
     static std::vector<uint32_t> get_dim_2_size() { return {1, 9}; }
@@ -365,7 +354,7 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
     // NOLINTBEGIN(clang-analyzer-cplusplus.NewDeleteLeaks)
     void run()
     {
-        auto run_reorder = [this](uint32_t dim_0,
+        auto run_reorder = [](uint32_t dim_0,
                                   uint32_t dim_1,
                                   uint32_t dim_2,
                                   uint32_t dim_3,
@@ -394,8 +383,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
             tensor<T> t_dst_gpu(tensor_len, tensor_strides);
             rand_tensor_integer(t_src);
 
+            auto& handle = get_handle();
             miopen::ExecutionContext ctx;
-            ctx.SetStream(&miopen::deref(this->handle));
+            ctx.SetStream(&handle);
             // ctx.SetupFloats();
             auto reorder_sol = MakeTensorReorderAttributes(ctx,
                                                            to_miopen_data_type<T>::get(),
@@ -408,36 +398,15 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                                                            order_2,
                                                            order_3);
             EXPECT(reorder_sol != nullptr);
-            size_t workspace = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz
+            size_t workspace_size = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz
                                                           : reorder_sol->GetOutputTensorSize();
-#if MIOPEN_BACKEND_OPENCL
-            cl_context cl_ctx;
-            clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &cl_ctx, nullptr);
-            cl_int status = CL_SUCCESS;
-            cl_mem src_dev =
-                clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, sizeof(T) * tensor_sz, nullptr, &status);
-            cl_mem dst_dev = clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, workspace, nullptr, nullptr);
-            status |= clEnqueueWriteBuffer(q,
-                                           src_dev,
-                                           CL_TRUE,
-                                           0,
-                                           sizeof(T) * tensor_sz,
-                                           t_src.data.data(),
-                                           0,
-                                           nullptr,
-                                           nullptr);
-            EXPECT(status == CL_SUCCESS);
-#elif MIOPEN_BACKEND_HIP
-            void* src_dev;
-            void* dst_dev;
-            EXPECT(hipMalloc(&src_dev, sizeof(T) * tensor_sz) == hipSuccess);
-            EXPECT(hipMalloc(&dst_dev, workspace) == hipSuccess);
-            EXPECT(hipMemcpy(
-                       src_dev, t_src.data.data(), sizeof(T) * tensor_sz, hipMemcpyHostToDevice) ==
-                   hipSuccess);
-#endif
+            Workspace wspace{};
+            wspace.resize(workspace_size);
+
+            auto src_dev = handle.Write(t_src.data);
+
             const auto invoke_param = reorder_invoke_param{
-                DataCast(static_cast<const void*>(src_dev)), DataCast(dst_dev)};
+                src_dev.get(), wspace.ptr()};
             std::vector<OpKernelArg> opArgs = reorder_sol->GetKernelArg();
             boost::optional<miopen::InvokerFactory> invoker_factory(
                 [=](const std::vector<miopen::Kernel>& kernels) mutable {
@@ -454,9 +423,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
             std::vector<miopen::solver::KernelInfo> construction_params{
                 reorder_sol->GetKernelInfo()};
             const auto invoker =
-                miopen::deref(this->handle).PrepareInvoker(*invoker_factory, construction_params);
+                handle.PrepareInvoker(*invoker_factory, construction_params);
             // run gpu
-            invoker(miopen::deref(this->handle), invoke_param);
+            invoker(handle, invoke_param);
             // run cpu
             cpu_reorder<T>::run(t_dst.data.data(),
                                 t_src.data.data(),
@@ -469,18 +438,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                                 order_2,
                                 order_3);
             invoker_factory = boost::none;
-#if MIOPEN_BACKEND_OPENCL
-            status = clEnqueueReadBuffer(
-                q, dst_dev, CL_TRUE, 0, workspace, t_dst_gpu.data.data(), 0, nullptr, nullptr);
-            EXPECT(status == CL_SUCCESS);
-            clReleaseMemObject(dst_dev);
-            clReleaseMemObject(src_dev);
-#elif MIOPEN_BACKEND_HIP
-            EXPECT(hipMemcpy(t_dst_gpu.data.data(), dst_dev, workspace, hipMemcpyDeviceToHost) ==
-                   hipSuccess);
-            hipFree(dst_dev);
-            hipFree(src_dev);
-#endif
+
+            t_dst_gpu.data = wspace.Read<decltype(t_dst_gpu.data)>();
+
             // we expect excact match, since use integer
             bool valid_result = verify_tensor(t_dst_gpu, t_dst);
             std::cout << "[" << reorder_str::get(order_0, order_1, order_2, order_3) << ", b"
diff --git a/test/workspace.hpp b/test/workspace.hpp
index 37137918a7..adf62a95ed 100644
--- a/test/workspace.hpp
+++ b/test/workspace.hpp
@@ -119,9 +119,9 @@ class Workspace {
   template <typename V>
   void Write(const V& vec) {
     using T = typename V::value_type;
-    auto bytes = vec.size() * sizeof(T);
-    resize(bytes);
-    auto s = hipMemcpyHostToDevice(this->ptr(), &vec[0], bytes);
+    resize(vec.size() * sizeof(T));
+    auto s = hipMemcpy(this->ptr(), &vec[0], size(),
+        hipMemcpyHostToDevice);
     if(s != hipSuccess) {
       abort();
     }
@@ -132,7 +132,8 @@ class Workspace {
     using T = typename V::value_type;
     size_t num_elem = size() / sizeof(T);
     V ret(num_elem);
-    auto s = hipMemcpyDeviceToHost(&ret[0], ptr(), size());
+    auto s = hipMemcpy(&ret[0], ptr(), size(),
+        hipMemcpyDeviceToHost);
     if (s != hipSuccess) {
       abort();
     }

From cdb9325b0d3c7aeac2c719cf980a3d94f0c0ebd3 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Wed, 15 Nov 2023 14:57:16 +0000
Subject: [PATCH 06/14] fix a check

---
 test/lstm_common.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index 29f132c1e7..78c639e5d3 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -1235,7 +1235,7 @@ verify_backward_data_lstm<T>::gpu() const
     size_t reserveSpaceSize = 0;
     miopenGetRNNTrainingReserveSize(
         &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-    if (reserveSpaceSize != RSVgpu.size()) {
+    if (reserveSpaceSize != (RSVgpu.size() * sizeof(T))) {
       std::abort();
     }
     Workspace rspace{};

From 71f709b0c71c36968495779b7c0c8150958e0eba Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Wed, 15 Nov 2023 14:57:43 +0000
Subject: [PATCH 07/14] fix format

---
 test/conv_common.hpp                    |   8 +-
 test/find_2_conv.cpp                    |  13 +-
 test/gru_common.hpp                     |  12 +-
 test/gtest/conv_api_strided_tensors.cpp |   6 +-
 test/gtest/solver_bwd.hpp               |   7 +-
 test/gtest/solver_fwd.hpp               |   7 +-
 test/gtest/solver_wrw.hpp               |   7 +-
 test/lstm_common.hpp                    |  21 +--
 test/main.cpp                           |   9 +-
 test/pooling_common.hpp                 |   6 +-
 test/reduce_test.cpp                    |  40 +++--
 test/rnn_vanilla_common.hpp             |  10 +-
 test/tensor_reorder.cpp                 |  22 ++-
 test/workspace.hpp                      | 195 +++++++++++++-----------
 14 files changed, 177 insertions(+), 186 deletions(-)

diff --git a/test/conv_common.hpp b/test/conv_common.hpp
index 5fd71e2f2a..c2665c0072 100644
--- a/test/conv_common.hpp
+++ b/test/conv_common.hpp
@@ -324,9 +324,9 @@ struct conv_base
             Workspace wspace{};
             wspace.resize(workspace_size);
 
-            EXPECT_EQUAL(miopenStatusSuccess,
-                         miopenRunSolution(
-                             handle, solution, 3, arguments, wspace.ptr(), wspace.size()));
+            EXPECT_EQUAL(
+                miopenStatusSuccess,
+                miopenRunSolution(handle, solution, 3, arguments, wspace.ptr(), wspace.size()));
         }
 
         const auto& solution_deref = miopen::deref(solutions.front());
@@ -1211,7 +1211,7 @@ struct verify_backward_conv : conv_base<T>
 
                 if(selected.workspace_size > 0)
                 {
-                  wspace.resize(selected.workspace_size);
+                    wspace.resize(selected.workspace_size);
                 }
 
                 filter.ConvolutionBackwardImmediate(handle,
diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp
index 8ed26211e3..4e1433b1e5 100644
--- a/test/find_2_conv.cpp
+++ b/test/find_2_conv.cpp
@@ -214,9 +214,9 @@ struct Find2Test : test_driver
                 Workspace wspace{};
                 wspace.resize(workspace_size);
 
-                EXPECT_EQUAL(miopenSetFindOptionPreallocatedWorkspace(
-                                 options, wspace.ptr(), wspace.size()), 
-                             miopenStatusSuccess);
+                EXPECT_EQUAL(
+                    miopenSetFindOptionPreallocatedWorkspace(options, wspace.ptr(), wspace.size()),
+                    miopenStatusSuccess);
 
                 EXPECT_EQUAL(miopenSetFindOptionPreallocatedTensor(
                                  options, miopenTensorConvolutionX, x_dev.get()),
@@ -329,10 +329,9 @@ struct Find2Test : test_driver
                 arguments[i].buffer     = buffers[i];
             }
 
-            EXPECT_EQUAL(
-                miopenRunSolution(
-                    handle, solution, 3, arguments.get(), wspace.ptr(), wspace.size()),
-                miopenStatusSuccess);
+            EXPECT_EQUAL(miopenRunSolution(
+                             handle, solution, 3, arguments.get(), wspace.ptr(), wspace.size()),
+                         miopenStatusSuccess);
         };
 
         // Without descriptors
diff --git a/test/gru_common.hpp b/test/gru_common.hpp
index 736ff96dfc..89b5d4a18d 100644
--- a/test/gru_common.hpp
+++ b/test/gru_common.hpp
@@ -1964,7 +1964,7 @@ struct verify_forward_infer_gru
 #endif
         auto&& handle = get_handle();
 
-        size_t out_sz        = 0;
+        size_t out_sz         = 0;
         size_t workspace_size = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
@@ -2250,7 +2250,7 @@ struct verify_forward_train_gru
         auto&& handle = get_handle();
 
         size_t out_sz           = 0;
-        size_t workspace_size    = 0;
+        size_t workspace_size   = 0;
         size_t reserveSpaceSize = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
@@ -2563,7 +2563,7 @@ struct verify_backward_data_gru
 
         auto&& handle = get_handle();
 
-        size_t out_sz        = 0;
+        size_t out_sz = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -2584,9 +2584,9 @@ struct verify_backward_data_gru
         wspace.resize(workspace_size);
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
-        auto yin_dev          = handle.Write(yin);
-        auto dyin_dev         = handle.Write(dy);
-        auto weights_dev      = handle.Write(weights);
+        auto yin_dev     = handle.Write(yin);
+        auto dyin_dev    = handle.Write(dy);
+        auto weights_dev = handle.Write(weights);
 
         Workspace rspace{};
         rspace.Write(reserveSpace);
diff --git a/test/gtest/conv_api_strided_tensors.cpp b/test/gtest/conv_api_strided_tensors.cpp
index 04f3ccbf19..d4e49201f4 100644
--- a/test/gtest/conv_api_strided_tensors.cpp
+++ b/test/gtest/conv_api_strided_tensors.cpp
@@ -146,9 +146,9 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
 {
     auto device = Device(handle);
 
-    auto d_input     = device.Malloc(input_bytes);
-    auto d_filter    = device.Malloc(filter_bytes);
-    auto d_output    = device.Malloc(output_bytes);
+    auto d_input  = device.Malloc(input_bytes);
+    auto d_filter = device.Malloc(filter_bytes);
+    auto d_output = device.Malloc(output_bytes);
 
     std::fill_n(h_input.begin(), h_input.size(), 1.f);
     ASSERT_TRUE(d_input.CopyToDevice(h_input.data(), input_bytes));
diff --git a/test/gtest/solver_bwd.hpp b/test/gtest/solver_bwd.hpp
index 988368e3c0..6511800ce6 100644
--- a/test/gtest/solver_bwd.hpp
+++ b/test/gtest/solver_bwd.hpp
@@ -80,11 +80,8 @@ struct ConvBwdSolverTest
             wspace.resize(cur_sol_ws);
         }
 
-        const auto invoke_params =
-            miopen::conv::DataInvokeParams{tensors,
-                                           wspace.ptr(),
-                                           wspace.size(),
-                                           conv_desc.attribute.gfx90aFp16alt.GetBwd()};
+        const auto invoke_params = miopen::conv::DataInvokeParams{
+            tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()};
 
         auto sol = GetSolution(solv, ctx, problem);
         ASSERT_TRUE(sol.Succeeded());
diff --git a/test/gtest/solver_fwd.hpp b/test/gtest/solver_fwd.hpp
index a286cc0bbb..20b16fcc32 100644
--- a/test/gtest/solver_fwd.hpp
+++ b/test/gtest/solver_fwd.hpp
@@ -81,11 +81,8 @@ struct ConvFwdSolverTest
             wspace.resize(cur_sol_ws);
         }
 
-        const auto invoke_params =
-            miopen::conv::DataInvokeParams{tensors,
-                                           wspace.ptr(),
-                                           wspace.size(),
-                                           this->conv_desc.attribute.gfx90aFp16alt.GetFwd()};
+        const auto invoke_params = miopen::conv::DataInvokeParams{
+            tensors, wspace.ptr(), wspace.size(), this->conv_desc.attribute.gfx90aFp16alt.GetFwd()};
 
         // auto sol = solv.GetSolution(ctx, problem);
         // This is complicated due to the split between tunable and non-tunable solvers
diff --git a/test/gtest/solver_wrw.hpp b/test/gtest/solver_wrw.hpp
index 097c9c6122..dcf8311d83 100644
--- a/test/gtest/solver_wrw.hpp
+++ b/test/gtest/solver_wrw.hpp
@@ -80,11 +80,8 @@ struct ConvWrwSolverTest
             wspace.resize(cur_sol_ws);
         }
 
-        const auto invoke_params =
-            miopen::conv::WrWInvokeParams{tensors,
-                                          wspace.ptr(),
-                                          wspace.size(),
-                                          conv_desc.attribute.gfx90aFp16alt.GetBwd()};
+        const auto invoke_params = miopen::conv::WrWInvokeParams{
+            tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()};
 
         auto sol = GetSolution(solv, ctx, problem);
         ASSERT_TRUE(sol.Succeeded());
diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index 78c639e5d3..be81761132 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -534,7 +534,7 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
 
         auto&& handle = get_handle();
 
-        size_t out_sz        = 0;
+        size_t out_sz = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -865,8 +865,9 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
             ChangeDataPadding(*packed_output, output, batch_seq, batch_seq[0], out_h, true);
         }
 
-        if (reserveSpace.size() != RSVcpu.size()) {
-          std::abort();
+        if(reserveSpace.size() != RSVcpu.size())
+        {
+            std::abort();
         }
         std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin());
 
@@ -915,7 +916,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
         std::fill(output.begin(), output.end(), static_cast<T>(0));
         auto output_dev = handle.Write(output);
 
-        size_t workspace_size    = 0;
+        size_t workspace_size   = 0;
         size_t reserveSpaceSize = 0;
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
         miopenGetRNNTrainingReserveSize(
@@ -1213,7 +1214,6 @@ verify_backward_data_lstm<T>::gpu() const
 
     auto&& handle = get_handle();
 
-
     std::vector<miopen::TensorDescriptor> inputCPPDescs;
     std::vector<miopenTensorDescriptor_t> inputDescs;
     createTensorDescArray(
@@ -1235,15 +1235,16 @@ verify_backward_data_lstm<T>::gpu() const
     size_t reserveSpaceSize = 0;
     miopenGetRNNTrainingReserveSize(
         &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-    if (reserveSpaceSize != (RSVgpu.size() * sizeof(T))) {
-      std::abort();
+    if(reserveSpaceSize != (RSVgpu.size() * sizeof(T)))
+    {
+        std::abort();
     }
     Workspace rspace{};
     rspace.Write(RSVgpu);
 
-    auto yin_dev          = handle.Write(yin);
-    auto dyin_dev         = handle.Write(dy);
-    auto weights_dev      = handle.Write(weights);
+    auto yin_dev     = handle.Write(yin);
+    auto dyin_dev    = handle.Write(dy);
+    auto weights_dev = handle.Write(weights);
 
     std::vector<int> hlens(3, 0);
     hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
diff --git a/test/main.cpp b/test/main.cpp
index 7a5d0cd5b0..aeba6e6d23 100644
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -44,10 +44,7 @@ struct handle_fixture
 {
     miopenHandle_t handle{};
 
-    handle_fixture()
-    {
-        miopenCreate(&handle);
-    }
+    handle_fixture() { miopenCreate(&handle); }
 
     ~handle_fixture() { miopenDestroy(handle); }
 };
@@ -167,7 +164,6 @@ struct conv_forward : output_tensor_fixture
     {
         float alpha = 1, beta = 0;
 
-
         int n, h, c, w;
         STATUS(miopenGet4dTensorDescriptorLengths(inputTensor, &n, &c, &h, &w));
         size_t sz_in = static_cast<size_t>(n) * c * h * w;
@@ -200,7 +196,7 @@ struct conv_forward : output_tensor_fixture
 
         auto& mhand = get_handle();
 
-        auto in_dev = mhand.Write(in);
+        auto in_dev  = mhand.Write(in);
         auto wei_dev = mhand.Write(wei);
         auto out_dev = mhand.Write(out);
 
@@ -271,7 +267,6 @@ struct conv_forward : output_tensor_fixture
         {
             CHECK(time == 0.0);
         }
-
     }
 };
 
diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp
index a98d9fb82d..6a05a57b40 100644
--- a/test/pooling_common.hpp
+++ b/test/pooling_common.hpp
@@ -200,8 +200,8 @@ struct verify_forward_pooling
         auto out      = get_output_tensor(filter, input);
         indices.resize(out.data.size(), 0);
 
-        auto in_dev        = handle.Write(input.data);
-        auto out_dev       = handle.Create<T>(out.data.size());
+        auto in_dev  = handle.Write(input.data);
+        auto out_dev = handle.Create<T>(out.data.size());
         Workspace wspace{};
         wspace.Write(indices);
 
@@ -217,7 +217,7 @@ struct verify_forward_pooling
                        wspace.ptr(),
                        wspace.size());
 
-        indices = wspace.Read<std::vector<Index>>();
+        indices  = wspace.Read<std::vector<Index>>();
         out.data = handle.Read<T>(out_dev, out.data.size());
         return out;
     }
diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp
index a48d1fc9b8..4c859d0256 100644
--- a/test/reduce_test.cpp
+++ b/test/reduce_test.cpp
@@ -61,27 +61,25 @@ struct verify_reduce_with_indices
     miopenReduceTensorIndices_t indicesOpt;
     miopenIndicesType_t indicesType;
 
-    verify_reduce_with_indices(
-        const miopen::ReduceTensorDescriptor& reduce_,
-        const tensor<T>& input_,
-        const tensor<T>& output_,
-        const tensor<T>& workspace_,
-        const tensor<int>& indices_,
-        float alpha_,
-        float beta_)
-      : 
-        reduce(reduce_),
-        input(input_),
-        output(output_),
-        workspace(workspace_),
-        indices(indices_),
-        alpha(alpha_),
-        beta(beta_),
-        reduceOp(reduce.reduceTensorOp_),
-        compTypeVal(reduce.reduceTensorCompType_),
-        nanOpt     (reduce.reduceTensorNanOpt_),
-        indicesOpt (reduce.reduceTensorIndices_),
-        indicesType(reduce.reduceTensorIndicesType_)
+    verify_reduce_with_indices(const miopen::ReduceTensorDescriptor& reduce_,
+                               const tensor<T>& input_,
+                               const tensor<T>& output_,
+                               const tensor<T>& workspace_,
+                               const tensor<int>& indices_,
+                               float alpha_,
+                               float beta_)
+        : reduce(reduce_),
+          input(input_),
+          output(output_),
+          workspace(workspace_),
+          indices(indices_),
+          alpha(alpha_),
+          beta(beta_),
+          reduceOp(reduce.reduceTensorOp_),
+          compTypeVal(reduce.reduceTensorCompType_),
+          nanOpt(reduce.reduceTensorNanOpt_),
+          indicesOpt(reduce.reduceTensorIndices_),
+          indicesType(reduce.reduceTensorIndicesType_)
     {
     }
 
diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp
index 9a96cfb106..ce283bddde 100644
--- a/test/rnn_vanilla_common.hpp
+++ b/test/rnn_vanilla_common.hpp
@@ -1727,7 +1727,6 @@ struct verify_forward_train_rnn
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -1768,10 +1767,9 @@ struct verify_forward_train_rnn
         }
 #endif
 
-        auto retSet = std::make_tuple(
-            handle.Read<T>(output_dev, output.size()),
-            (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
-            rspace.Read<std::vector<T>>());
+        auto retSet = std::make_tuple(handle.Read<T>(output_dev, output.size()),
+                                      (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
+                                      rspace.Read<std::vector<T>>());
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
 
@@ -2010,7 +2008,7 @@ struct verify_backward_data_rnn
         // auto dhyin_dev        = handle.Write(dhy);
         Workspace rspace{};
         rspace.Write(reserveSpace);
-        auto weights_dev      = handle.Write(weights);
+        auto weights_dev = handle.Write(weights);
         // auto hx_dev           = handle.Write(initHidden);
 
         std::vector<int> hlens(3, 0);
diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp
index c2ce8a94a9..85210c94b9 100644
--- a/test/tensor_reorder.cpp
+++ b/test/tensor_reorder.cpp
@@ -355,13 +355,13 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
     void run()
     {
         auto run_reorder = [](uint32_t dim_0,
-                                  uint32_t dim_1,
-                                  uint32_t dim_2,
-                                  uint32_t dim_3,
-                                  uint32_t order_0,
-                                  uint32_t order_1,
-                                  uint32_t order_2,
-                                  uint32_t order_3) {
+                              uint32_t dim_1,
+                              uint32_t dim_2,
+                              uint32_t dim_3,
+                              uint32_t order_0,
+                              uint32_t order_1,
+                              uint32_t order_2,
+                              uint32_t order_3) {
             int tensor_sz = dim_0 * dim_1 * dim_2 * dim_3;
             std::vector<int> tensor_len({static_cast<int>(dim_0),
                                          static_cast<int>(dim_1),
@@ -399,14 +399,13 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                                                            order_3);
             EXPECT(reorder_sol != nullptr);
             size_t workspace_size = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz
-                                                          : reorder_sol->GetOutputTensorSize();
+                                                               : reorder_sol->GetOutputTensorSize();
             Workspace wspace{};
             wspace.resize(workspace_size);
 
             auto src_dev = handle.Write(t_src.data);
 
-            const auto invoke_param = reorder_invoke_param{
-                src_dev.get(), wspace.ptr()};
+            const auto invoke_param         = reorder_invoke_param{src_dev.get(), wspace.ptr()};
             std::vector<OpKernelArg> opArgs = reorder_sol->GetKernelArg();
             boost::optional<miopen::InvokerFactory> invoker_factory(
                 [=](const std::vector<miopen::Kernel>& kernels) mutable {
@@ -422,8 +421,7 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                 });
             std::vector<miopen::solver::KernelInfo> construction_params{
                 reorder_sol->GetKernelInfo()};
-            const auto invoker =
-                handle.PrepareInvoker(*invoker_factory, construction_params);
+            const auto invoker = handle.PrepareInvoker(*invoker_factory, construction_params);
             // run gpu
             invoker(handle, invoke_param);
             // run cpu
diff --git a/test/workspace.hpp b/test/workspace.hpp
index adf62a95ed..b60db33d7e 100644
--- a/test/workspace.hpp
+++ b/test/workspace.hpp
@@ -29,68 +29,75 @@
 
 #include "get_handle.hpp"
 
-class Workspace {
-
-  // RAII class for hip allocations
-  class GPUBuffer {
-  public:
-    GPUBuffer() = default;
-
-    explicit GPUBuffer(size_t num_bytes) {
-      assert(num_bytes > 0);
-      auto s = hipMalloc(&buf_, num_bytes);
-      if (s != hipSuccess || !buf_) {
-        std::abort();
-      }
-    }
-
-    ~GPUBuffer() {
-      auto s = hipFree(buf_);
-      buf_ = nullptr;
-      if (s != hipSuccess) {
-        std::abort();
-      }
-    }
+class Workspace
+{
+
+    // RAII class for hip allocations
+    class GPUBuffer
+    {
+    public:
+        GPUBuffer() = default;
+
+        explicit GPUBuffer(size_t num_bytes)
+        {
+            assert(num_bytes > 0);
+            auto s = hipMalloc(&buf_, num_bytes);
+            if(s != hipSuccess || !buf_)
+            {
+                std::abort();
+            }
+        }
+
+        ~GPUBuffer()
+        {
+            auto s = hipFree(buf_);
+            buf_   = nullptr;
+            if(s != hipSuccess)
+            {
+                std::abort();
+            }
+        }
+
+        void* ptr() { return buf_; }
+        void* ptr() const { return buf_; }
+
+        GPUBuffer(const GPUBuffer&) = delete;
+        GPUBuffer& operator=(const GPUBuffer&) = delete;
+
+        GPUBuffer(GPUBuffer&& that) : buf_(std::move(that.buf_))
+        {
+            that.buf_ = nullptr; // take over ownership
+        }
+
+        GPUBuffer& operator=(GPUBuffer&& that)
+        {
+            std::swap(this->buf_, that.buf_);
+            return *this;
+        }
+
+    private:
+        void* buf_ = nullptr;
+    };
 
-    void* ptr() { return buf_; }
-    void* ptr() const { return buf_; }
+public:
+    Workspace() = default;
 
-    GPUBuffer(const GPUBuffer&) = delete;
-    GPUBuffer& operator = (const GPUBuffer&) = delete;
+    Workspace(const Workspace&) = delete;
+    Workspace& operator=(const Workspace&) = delete;
+    Workspace(Workspace&&)                 = default;
+    Workspace& operator=(Workspace&&) = default;
 
-    GPUBuffer(GPUBuffer&& that): 
-      buf_(std::move(that.buf_)) {
-        that.buf_ = nullptr; // take over ownership
-      }
+    size_t size() const { return sz_; }
 
-    GPUBuffer& operator = (GPUBuffer&& that) {
-      std::swap(this->buf_, that.buf_);
-      return *this;
+    void resize(size_t sz_in_bytes)
+    {
+        sz_ = sz_in_bytes;
+        AdjustToSize();
     }
 
-  private:
-    void* buf_ = nullptr;
-  }; 
-
-public:
-  Workspace() = default;
-  
-  Workspace(const Workspace&) = delete;
-  Workspace& operator = (const Workspace&) = delete;
-  Workspace(Workspace&&) = default;
-  Workspace& operator = (Workspace&&) = default;
-
-  size_t size() const { return sz_; }
-
-  void resize(size_t sz_in_bytes) {
-    sz_ = sz_in_bytes;
-    AdjustToSize();
-  }
-
-
-  // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is
-  // passed to capture the size. Must call AdjustToSize() after calling such a method
-  size_t* SizePtr() { return &sz_; }
+    // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is
+    // passed to capture the size. Must call AdjustToSize() after calling such a method
+    size_t* SizePtr() { return &sz_; }
 
 #if 0
   auto ptr() const { return data_.get(); }
@@ -103,48 +110,52 @@ class Workspace {
     }
   }
 #else
-  auto ptr() const { return gpu_buf_.ptr(); }
-
-  auto ptr() { return gpu_buf_.ptr(); }
-
-  void AdjustToSize() {
-    if (sz_ != 0) {
-      gpu_buf_ = GPUBuffer(sz_);
-    } else {
-      gpu_buf_ = GPUBuffer();
+    auto ptr() const { return gpu_buf_.ptr(); }
+
+    auto ptr() { return gpu_buf_.ptr(); }
+
+    void AdjustToSize()
+    {
+        if(sz_ != 0)
+        {
+            gpu_buf_ = GPUBuffer(sz_);
+        }
+        else
+        {
+            gpu_buf_ = GPUBuffer();
+        }
     }
-  }
 #endif
 
-  template <typename V>
-  void Write(const V& vec) {
-    using T = typename V::value_type;
-    resize(vec.size() * sizeof(T));
-    auto s = hipMemcpy(this->ptr(), &vec[0], size(),
-        hipMemcpyHostToDevice);
-    if(s != hipSuccess) {
-      abort();
+    template <typename V>
+    void Write(const V& vec)
+    {
+        using T = typename V::value_type;
+        resize(vec.size() * sizeof(T));
+        auto s = hipMemcpy(this->ptr(), &vec[0], size(), hipMemcpyHostToDevice);
+        if(s != hipSuccess)
+        {
+            abort();
+        }
     }
-  }
 
-  template <typename V>
-  V Read() const {
-    using T = typename V::value_type;
-    size_t num_elem = size() / sizeof(T);
-    V ret(num_elem);
-    auto s = hipMemcpy(&ret[0], ptr(), size(),
-        hipMemcpyDeviceToHost);
-    if (s != hipSuccess) {
-      abort();
+    template <typename V>
+    V Read() const
+    {
+        using T         = typename V::value_type;
+        size_t num_elem = size() / sizeof(T);
+        V ret(num_elem);
+        auto s = hipMemcpy(&ret[0], ptr(), size(), hipMemcpyDeviceToHost);
+        if(s != hipSuccess)
+        {
+            abort();
+        }
+        return ret;
     }
-    return ret;
-  }
 
 private:
-  // miopen::Handle& handle_;
-  // miopen::Allocator::ManageDataPtr data_{};
-  GPUBuffer gpu_buf_{};
-  size_t sz_{};
-
-
+    // miopen::Handle& handle_;
+    // miopen::Allocator::ManageDataPtr data_{};
+    GPUBuffer gpu_buf_{};
+    size_t sz_{};
 };

From ae5d50e412dfc99180697283e284e957ae678816 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Wed, 15 Nov 2023 16:05:13 +0000
Subject: [PATCH 08/14] missed some instances

---
 test/gru_common.hpp | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/test/gru_common.hpp b/test/gru_common.hpp
index 89b5d4a18d..4f2072ec8f 100644
--- a/test/gru_common.hpp
+++ b/test/gru_common.hpp
@@ -1982,7 +1982,6 @@ struct verify_forward_infer_gru
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        std::vector<T> workSpace(workspace_size / sizeof(T));
 
         auto input_dev = handle.Write(input);
 
@@ -1995,7 +1994,9 @@ struct verify_forward_infer_gru
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev = handle.Write(workSpace);
+
+        Workspace wspace{};
+        wspace.resize(workspace_size);
 
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
@@ -2028,8 +2029,8 @@ struct verify_forward_infer_gru
                                   ((nohy) ? nullptr : hy_dev.get()),
                                   &hiddenDesc,
                                   nullptr,
-                                  workSpace_dev.get(),
-                                  workspace_size);
+                                  wspace.ptr(),
+                                  wspace.size());
 
 #if(MIO_GRU_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -2267,11 +2268,13 @@ struct verify_forward_train_gru
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{};
+        wspace.resize(workspace_size);
+
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-
-        std::vector<T> workSpace(workspace_size / sizeof(T));
-        std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
+        Workspace rspace{};
+        rspace.resize(reserveSpaceSize);
 
         auto input_dev = handle.Write(input);
 
@@ -2285,9 +2288,6 @@ struct verify_forward_train_gru
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -2319,10 +2319,10 @@ struct verify_forward_train_gru
                                  ((nohy) ? nullptr : hy_dev.get()),
                                  &hiddenDesc,
                                  nullptr,
-                                 workSpace_dev.get(),
-                                 workspace_size,
-                                 reserveSpace_dev.get(),
-                                 reserveSpaceSize);
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_GRU_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -2335,7 +2335,7 @@ struct verify_forward_train_gru
         auto retSet = std::make_tuple(
             handle.Read<T>(output_dev, output.size()),
             (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
-            handle.Read<T>(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T)));
+            rspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();

From 83d622b849c16c5d888b4275e95aa2e10e081d4e Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Wed, 15 Nov 2023 17:33:15 +0000
Subject: [PATCH 09/14] bring back the workspace round up logic

---
 test/gru_common.hpp         |  6 ++++--
 test/lstm_common.hpp        | 11 +++++++----
 test/rnn_vanilla_common.hpp |  6 ++++--
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/test/gru_common.hpp b/test/gru_common.hpp
index 4f2072ec8f..c86fe15e47 100644
--- a/test/gru_common.hpp
+++ b/test/gru_common.hpp
@@ -1981,6 +1981,8 @@ struct verify_forward_infer_gru
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{};
+        wspace.resize(workspace_size);
 
 
         auto input_dev = handle.Write(input);
@@ -1995,8 +1997,6 @@ struct verify_forward_infer_gru
         auto hy_dev = handle.Write(hy);
 
 
-        Workspace wspace{};
-        wspace.resize(workspace_size);
 
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
@@ -2273,6 +2273,8 @@ struct verify_forward_train_gru
 
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
+        reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) & ~(sizeof(T) - 1);
+        assert(reserveSpaceSize % sizeof(T) == 0);
         Workspace rspace{};
         rspace.resize(reserveSpaceSize);
 
diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index be81761132..17217c763d 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -917,14 +917,15 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
         auto output_dev = handle.Write(output);
 
         size_t workspace_size   = 0;
-        size_t reserveSpaceSize = 0;
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{};
+        wspace.resize(workspace_size);
+
+        size_t reserveSpaceSize = 0;
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-
-        Workspace wspace{};
+        reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
         Workspace rspace{};
-        wspace.resize(workspace_size);
         rspace.resize(reserveSpaceSize);
 
         auto weights_dev = handle.Write(weights);
@@ -1235,6 +1236,8 @@ verify_backward_data_lstm<T>::gpu() const
     size_t reserveSpaceSize = 0;
     miopenGetRNNTrainingReserveSize(
         &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
+    reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
+
     if(reserveSpaceSize != (RSVgpu.size() * sizeof(T)))
     {
         std::abort();
diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp
index ce283bddde..9b8d89e908 100644
--- a/test/rnn_vanilla_common.hpp
+++ b/test/rnn_vanilla_common.hpp
@@ -1707,11 +1707,13 @@ struct verify_forward_train_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        Workspace wspace{};
+        wspace.resize(workSpaceSize);
+
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
+        reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
 
-        Workspace wspace{};
-        wspace.resize(workSpaceSize);
         Workspace rspace{};
         rspace.resize(reserveSpaceSize);
 

From 4bd42530031b68311c1a65439cccca826c556ef3 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Wed, 15 Nov 2023 17:33:35 +0000
Subject: [PATCH 10/14] formatting

---
 test/gru_common.hpp  | 10 +++-------
 test/lstm_common.hpp |  2 +-
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/test/gru_common.hpp b/test/gru_common.hpp
index c86fe15e47..56666795fb 100644
--- a/test/gru_common.hpp
+++ b/test/gru_common.hpp
@@ -1984,7 +1984,6 @@ struct verify_forward_infer_gru
         Workspace wspace{};
         wspace.resize(workspace_size);
 
-
         auto input_dev = handle.Write(input);
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
@@ -1996,8 +1995,6 @@ struct verify_forward_infer_gru
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -2334,10 +2331,9 @@ struct verify_forward_train_gru
         }
 #endif
 
-        auto retSet = std::make_tuple(
-            handle.Read<T>(output_dev, output.size()),
-            (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
-            rspace.Read<std::vector<T>>());
+        auto retSet = std::make_tuple(handle.Read<T>(output_dev, output.size()),
+                                      (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
+                                      rspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index 17217c763d..a319fb3b7a 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -916,7 +916,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
         std::fill(output.begin(), output.end(), static_cast<T>(0));
         auto output_dev = handle.Write(output);
 
-        size_t workspace_size   = 0;
+        size_t workspace_size = 0;
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
         Workspace wspace{};
         wspace.resize(workspace_size);

From 6eef5fe3bcbc55a08c009e50a64d0a8181534acc Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Thu, 16 Nov 2023 13:46:57 +0000
Subject: [PATCH 11/14] fix hip tidy issues. add more checks

---
 test/lstm_common.hpp |  6 +++++-
 test/workspace.hpp   | 16 ++--------------
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index a319fb3b7a..75477a4c88 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -1092,7 +1092,10 @@ verify_backward_data_lstm<T>::cpu() const
         reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) / sizeof(T);
     }
 
-    std::vector<T> reserveSpace(RSVcpu.begin(), RSVcpu.begin() + reserveSpaceSize);
+    if (reserveSpaceSize != RSVcpu.size()) {
+      std::abort();
+    }
+    std::vector<T> reserveSpace(RSVcpu);
 
     std::vector<T> converted_dinput;
     std::vector<T> converted_output;
@@ -1298,6 +1301,7 @@ verify_backward_data_lstm<T>::gpu() const
                           rspace.ptr(),
                           rspace.size());
 
+    assert(RSVgpu.size() * sizeof(T) == rspace.size());
     RSVgpu = rspace.Read<std::vector<T>>();
     // TODO: remove workSpace
     auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
diff --git a/test/workspace.hpp b/test/workspace.hpp
index b60db33d7e..33f6656e09 100644
--- a/test/workspace.hpp
+++ b/test/workspace.hpp
@@ -42,7 +42,7 @@ class Workspace
         {
             assert(num_bytes > 0);
             auto s = hipMalloc(&buf_, num_bytes);
-            if(s != hipSuccess || !buf_)
+            if(s != hipSuccess || buf_ == nullptr)
             {
                 std::abort();
             }
@@ -64,7 +64,7 @@ class Workspace
         GPUBuffer(const GPUBuffer&) = delete;
         GPUBuffer& operator=(const GPUBuffer&) = delete;
 
-        GPUBuffer(GPUBuffer&& that) : buf_(std::move(that.buf_))
+        GPUBuffer(GPUBuffer&& that) : buf_(that.buf_)
         {
             that.buf_ = nullptr; // take over ownership
         }
@@ -99,17 +99,6 @@ class Workspace
     // passed to capture the size. Must call AdjustToSize() after calling such a method
     size_t* SizePtr() { return &sz_; }
 
-#if 0
-  auto ptr() const { return data_.get(); }
-  auto ptr() { return data_.get(); }
-  void AdjustToSize() {
-    if (sz_ == 0) {
-      data_.reset();
-    } else {
-      data_ = handle_.Create(sz_);
-    }
-  }
-#else
     auto ptr() const { return gpu_buf_.ptr(); }
 
     auto ptr() { return gpu_buf_.ptr(); }
@@ -125,7 +114,6 @@ class Workspace
             gpu_buf_ = GPUBuffer();
         }
     }
-#endif
 
     template <typename V>
     void Write(const V& vec)

From f22aa2d0b9d821ed6595984bb99bd350cb6caac8 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Thu, 16 Nov 2023 17:14:53 +0000
Subject: [PATCH 12/14] fix a bug with zeroing out rnn workspace

---
 test/lstm_common.hpp | 24 ++++++++++++++++++------
 test/workspace.hpp   | 21 ++++++++++++++++-----
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index 75477a4c88..bce643726b 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -977,7 +977,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
             printf("GPU outdata[%d]: %f\n", i, outdata[i]);
         }
 #endif
-        RSVgpu = rspace.Read<std::vector<T>>();
+        rspace.ReadTo(RSVgpu);
 
         std::vector<T> output_gpu = handle.Read<T>(output_dev, output.size());
 
@@ -1092,8 +1092,9 @@ verify_backward_data_lstm<T>::cpu() const
         reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) / sizeof(T);
     }
 
-    if (reserveSpaceSize != RSVcpu.size()) {
-      std::abort();
+    if(reserveSpaceSize != RSVcpu.size())
+    {
+        std::abort();
     }
     std::vector<T> reserveSpace(RSVcpu);
 
@@ -1233,12 +1234,23 @@ verify_backward_data_lstm<T>::gpu() const
 
     size_t workspace_size = 0;
     miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+    if(workspace_size % sizeof(T) != 0)
+    {
+        std::abort();
+    }
     Workspace wspace{};
-    wspace.resize(workspace_size);
+    // Needed to zero out the workspace (happens in std::vector's constructor)
+    // or else this test fails verification when workspace is compared against the
+    // workspace returned by ::cpu method in this class
+    wspace.Write(std::vector<T>(workspace_size / sizeof(T)));
+    // wspace.resize(workspace_size);
 
     size_t reserveSpaceSize = 0;
     miopenGetRNNTrainingReserveSize(
         &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
+    /// \todo: fix miopenGetRNNTrainingReserveSize to return a multiple of
+    /// sizeof(T)
+    // Needed because reserveSpaceSize returned is not a multiple of sizeof(T).
     reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
 
     if(reserveSpaceSize != (RSVgpu.size() * sizeof(T)))
@@ -1302,7 +1314,7 @@ verify_backward_data_lstm<T>::gpu() const
                           rspace.size());
 
     assert(RSVgpu.size() * sizeof(T) == rspace.size());
-    RSVgpu = rspace.Read<std::vector<T>>();
+    rspace.ReadTo(RSVgpu);
     // TODO: remove workSpace
     auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                   (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
@@ -1831,7 +1843,7 @@ struct lstm_basic_driver : test_driver
 #if(MIO_LSTM_TEST_DEBUG > 0)
         printf("Running backward weights LSTM.\n");
         printf("reserve sz: %d, workSpace sz: %d, weight sz: %d\n",
-               reserveSpaceBwdData.size(),
+               rsvcpu.size(),
                workSpaceBwdData.size(),
                wei_sz);
         fflush(nullptr);
diff --git a/test/workspace.hpp b/test/workspace.hpp
index 33f6656e09..04ffc2be39 100644
--- a/test/workspace.hpp
+++ b/test/workspace.hpp
@@ -128,16 +128,27 @@ class Workspace
     }
 
     template <typename V>
-    V Read() const
+    void ReadTo(V& vec) const
     {
-        using T         = typename V::value_type;
-        size_t num_elem = size() / sizeof(T);
-        V ret(num_elem);
-        auto s = hipMemcpy(&ret[0], ptr(), size(), hipMemcpyDeviceToHost);
+        using T = typename V::value_type;
+        if(vec.size() * sizeof(T) != size())
+        {
+            std::abort();
+        }
+        auto s = hipMemcpy(&vec[0], ptr(), size(), hipMemcpyDeviceToHost);
         if(s != hipSuccess)
         {
             abort();
         }
+    }
+
+    template <typename V>
+    V Read() const
+    {
+        using T         = typename V::value_type;
+        size_t num_elem = size() / sizeof(T);
+        V ret(num_elem);
+        ReadTo(ret);
         return ret;
     }
 

From d6ae39fb5b551f3bba9964c43cc24a6db3654640 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Thu, 16 Nov 2023 23:19:50 +0000
Subject: [PATCH 13/14] fix hip-tidy error

---
 test/workspace.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/workspace.hpp b/test/workspace.hpp
index 04ffc2be39..3f9e8aae1b 100644
--- a/test/workspace.hpp
+++ b/test/workspace.hpp
@@ -64,12 +64,12 @@ class Workspace
         GPUBuffer(const GPUBuffer&) = delete;
         GPUBuffer& operator=(const GPUBuffer&) = delete;
 
-        GPUBuffer(GPUBuffer&& that) : buf_(that.buf_)
+        GPUBuffer(GPUBuffer&& that) noexcept : buf_(that.buf_)
         {
             that.buf_ = nullptr; // take over ownership
         }
 
-        GPUBuffer& operator=(GPUBuffer&& that)
+        GPUBuffer& operator=(GPUBuffer&& that) noexcept
         {
             std::swap(this->buf_, that.buf_);
             return *this;

From 98ec151e8c163a553cb6fa4342f919c775e00b30 Mon Sep 17 00:00:00 2001
From: "M. Amber Hassaan" <amber_474@yahoo.com>
Date: Tue, 21 Nov 2023 18:51:15 +0000
Subject: [PATCH 14/14] address review comments

---
 test/conv_common.hpp        |  6 +--
 test/ctc.cpp                |  3 +-
 test/find_2_conv.cpp        |  6 +--
 test/find_db.cpp            | 12 ++----
 test/gru_common.hpp         | 12 ++----
 test/lstm_common.hpp        |  9 ++--
 test/main.cpp               |  3 +-
 test/rnn_vanilla_common.hpp | 12 ++----
 test/tensor_reorder.cpp     |  3 +-
 test/workspace.hpp          | 85 +++++++++++++++++++------------------
 10 files changed, 65 insertions(+), 86 deletions(-)

diff --git a/test/conv_common.hpp b/test/conv_common.hpp
index 7f595facb4..e387e98b5b 100644
--- a/test/conv_common.hpp
+++ b/test/conv_common.hpp
@@ -331,8 +331,7 @@ struct conv_base
             EXPECT_EQUAL(miopenStatusSuccess,
                          miopenGetSolutionWorkspaceSize(solution, &workspace_size));
 
-            Workspace wspace{};
-            wspace.resize(workspace_size);
+            Workspace wspace{workspace_size};
 
             EXPECT_EQUAL(
                 miopenStatusSuccess,
@@ -1729,8 +1728,7 @@ struct verify_forward_conv_int8 : conv_base<T>
                                     wei_vpad_dev.get());
         }
 
-        Workspace wspace{};
-        wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         int ret_algo_count;
         miopenConvAlgoPerf_t perf;
diff --git a/test/ctc.cpp b/test/ctc.cpp
index f53a6f04e3..1c759220f2 100644
--- a/test/ctc.cpp
+++ b/test/ctc.cpp
@@ -652,8 +652,7 @@ struct verify_ctcloss
                                                                    inputLengths.data(),
                                                                    miopenCTCLossAlgo_t(0));
 
-        Workspace wspace{};
-        wspace.resize(workSpaceSize);
+        Workspace wspace{workSpaceSize};
 
         auto losses_gpu = losses;
         auto grads_gpu  = grads;
diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp
index 4e1433b1e5..6e636e265f 100644
--- a/test/find_2_conv.cpp
+++ b/test/find_2_conv.cpp
@@ -211,8 +211,7 @@ struct Find2Test : test_driver
                 }
 
                 const auto workspace_size = std::min(workspace_limit, workspace_max);
-                Workspace wspace{};
-                wspace.resize(workspace_size);
+                Workspace wspace{workspace_size};
 
                 EXPECT_EQUAL(
                     miopenSetFindOptionPreallocatedWorkspace(options, wspace.ptr(), wspace.size()),
@@ -316,8 +315,7 @@ struct Find2Test : test_driver
         EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size),
                      miopenStatusSuccess);
 
-        Workspace wspace{};
-        wspace.resize(workspace_size);
+        Workspace wspace{workspace_size};
 
         const auto checked_run_solution = [&](miopenTensorDescriptor_t* descriptors_) {
             auto arguments = std::make_unique<miopenTensorArgument_t[]>(num_arguments);
diff --git a/test/find_db.cpp b/test/find_db.cpp
index 3977f9e220..f37b6e8595 100644
--- a/test/find_db.cpp
+++ b/test/find_db.cpp
@@ -107,9 +107,7 @@ struct FindDbTest : test_driver
         const auto ctx = ExecutionContext{&handle};
         const auto problem =
             conv::ProblemDescription{y.desc, w.desc, x.desc, filter, conv::Direction::BackwardData};
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-        Workspace wspace{};
-        wspace.resize(workspace_size);
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -140,9 +138,7 @@ struct FindDbTest : test_driver
         const auto ctx = ExecutionContext{&handle};
         const auto problem =
             conv::ProblemDescription{x.desc, w.desc, y.desc, filter, conv::Direction::Forward};
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-        Workspace wspace{};
-        wspace.resize(workspace_size);
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -173,9 +169,7 @@ struct FindDbTest : test_driver
         const auto ctx     = ExecutionContext{&handle};
         const auto problem = conv::ProblemDescription{
             y.desc, w.desc, x.desc, filter, conv::Direction::BackwardWeights};
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-        Workspace wspace{};
-        wspace.resize(workspace_size);
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         auto filterCall = [&]() {
             int ret_algo_count;
diff --git a/test/gru_common.hpp b/test/gru_common.hpp
index 56666795fb..4df6209e9d 100644
--- a/test/gru_common.hpp
+++ b/test/gru_common.hpp
@@ -1981,8 +1981,7 @@ struct verify_forward_infer_gru
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
-        Workspace wspace{};
-        wspace.resize(workspace_size);
+        Workspace wspace{workspace_size};
 
         auto input_dev = handle.Write(input);
 
@@ -2265,15 +2264,13 @@ struct verify_forward_train_gru
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
-        Workspace wspace{};
-        wspace.resize(workspace_size);
+        Workspace wspace{workspace_size};
 
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
         reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) & ~(sizeof(T) - 1);
         assert(reserveSpaceSize % sizeof(T) == 0);
-        Workspace rspace{};
-        rspace.resize(reserveSpaceSize);
+        Workspace rspace{reserveSpaceSize};
 
         auto input_dev = handle.Write(input);
 
@@ -2578,8 +2575,7 @@ struct verify_backward_data_gru
 
         size_t workspace_size = 0;
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
-        Workspace wspace{};
-        wspace.resize(workspace_size);
+        Workspace wspace{workspace_size};
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
         auto yin_dev     = handle.Write(yin);
diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index bce643726b..569927883e 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -552,8 +552,7 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
         size_t workspace_size = 0;
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        Workspace wspace{};
-        wspace.resize(workspace_size);
+        Workspace wspace{workspace_size};
 
         auto input_dev = handle.Write(input);
 
@@ -918,15 +917,13 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
 
         size_t workspace_size = 0;
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
-        Workspace wspace{};
-        wspace.resize(workspace_size);
+        Workspace wspace{workspace_size};
 
         size_t reserveSpaceSize = 0;
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
         reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
-        Workspace rspace{};
-        rspace.resize(reserveSpaceSize);
+        Workspace rspace{reserveSpaceSize};
 
         auto weights_dev = handle.Write(weights);
 
diff --git a/test/main.cpp b/test/main.cpp
index 1cd0a402b8..a1efb2eff8 100644
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -174,8 +174,7 @@ struct conv_forward : output_tensor_fixture
         STATUS(miopenConvolutionForwardGetWorkSpaceSize(
             handle, convFilter, inputTensor, convDesc, outputTensor, &sz_fwd_workspace));
 
-        Workspace wspace{};
-        wspace.resize(sz_fwd_workspace);
+        Workspace wspace{sz_fwd_workspace};
 
         std::vector<float> in(sz_in);
         std::vector<float> wei(sz_wei);
diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp
index 9b8d89e908..854f682068 100644
--- a/test/rnn_vanilla_common.hpp
+++ b/test/rnn_vanilla_common.hpp
@@ -1437,8 +1437,7 @@ struct verify_forward_infer_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        Workspace wspace{};
-        wspace.resize(workSpaceSize);
+        Workspace wspace{workSpaceSize};
 
         auto input_dev = handle.Write(input);
 
@@ -1707,15 +1706,13 @@ struct verify_forward_train_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        Workspace wspace{};
-        wspace.resize(workSpaceSize);
+        Workspace wspace{workSpaceSize};
 
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
         reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
 
-        Workspace rspace{};
-        rspace.resize(reserveSpaceSize);
+        Workspace rspace{reserveSpaceSize};
 
         auto input_dev = handle.Write(input);
 
@@ -2001,8 +1998,7 @@ struct verify_backward_data_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        Workspace wspace{};
-        wspace.resize(workSpaceSize);
+        Workspace wspace{workSpaceSize};
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
         auto yin_dev  = handle.Write(yin);
diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp
index b4a7208798..bf40e7ee38 100644
--- a/test/tensor_reorder.cpp
+++ b/test/tensor_reorder.cpp
@@ -398,8 +398,7 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
             EXPECT(reorder_sol != nullptr);
             size_t workspace_size = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz
                                                                : reorder_sol->GetOutputTensorSize();
-            Workspace wspace{};
-            wspace.resize(workspace_size);
+            Workspace wspace{workspace_size};
 
             auto src_dev = handle.Write(t_src.data);
 
diff --git a/test/workspace.hpp b/test/workspace.hpp
index 3f9e8aae1b..93522b1cd4 100644
--- a/test/workspace.hpp
+++ b/test/workspace.hpp
@@ -29,6 +29,12 @@
 
 #include "get_handle.hpp"
 
+#define HIP_CHECK(exp)                                                                 \
+    if((exp) != hipSuccess)                                                            \
+    {                                                                                  \
+        MIOPEN_LOG_E(#exp "failed at line: " << __LINE__ << " in file: " << __FILE__); \
+    }
+
 class Workspace
 {
 
@@ -38,49 +44,69 @@ class Workspace
     public:
         GPUBuffer() = default;
 
-        explicit GPUBuffer(size_t num_bytes)
+        explicit GPUBuffer(size_t num_bytes) : sz_(num_bytes)
         {
-            assert(num_bytes > 0);
-            auto s = hipMalloc(&buf_, num_bytes);
-            if(s != hipSuccess || buf_ == nullptr)
+            if(num_bytes > 0)
             {
-                std::abort();
+                HIP_CHECK(hipMalloc(&buf_, num_bytes));
+                assert(buf_ != nullptr);
             }
-        }
-
-        ~GPUBuffer()
-        {
-            auto s = hipFree(buf_);
-            buf_   = nullptr;
-            if(s != hipSuccess)
+            else
             {
-                std::abort();
+                buf_ = nullptr;
             }
         }
 
+        ~GPUBuffer() { FreeBuf(); }
+
         void* ptr() { return buf_; }
         void* ptr() const { return buf_; }
 
+        auto size() const { return sz_; }
+
         GPUBuffer(const GPUBuffer&) = delete;
         GPUBuffer& operator=(const GPUBuffer&) = delete;
 
-        GPUBuffer(GPUBuffer&& that) noexcept : buf_(that.buf_)
+        GPUBuffer(GPUBuffer&& that) noexcept : buf_(that.buf_), sz_(that.sz_)
         {
             that.buf_ = nullptr; // take over ownership
+            that.sz_  = 0;
         }
 
         GPUBuffer& operator=(GPUBuffer&& that) noexcept
         {
+            FreeBuf();
             std::swap(this->buf_, that.buf_);
+            std::swap(this->sz_, that.sz_);
             return *this;
         }
 
     private:
+        void FreeBuf()
+        {
+            HIP_CHECK(hipFree(buf_));
+            buf_ = nullptr;
+            sz_  = 0;
+        }
+
         void* buf_ = nullptr;
+        size_t sz_ = 0;
     };
 
+    // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is
+    // passed to capture the size. Must call AdjustToSize() after calling such a method
+    size_t* SizePtr() { return &sz_; }
+
+    void AdjustToSize()
+    {
+        if(sz_ != gpu_buf_.size())
+        {
+            gpu_buf_ = GPUBuffer(sz_);
+        }
+    }
+
 public:
-    Workspace() = default;
+    explicit Workspace(size_t sz = 0) : sz_(sz) { AdjustToSize(); }
 
     Workspace(const Workspace&) = delete;
     Workspace& operator=(const Workspace&) = delete;
@@ -95,36 +121,16 @@ class Workspace
         AdjustToSize();
     }
 
-    // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is
-    // passed to capture the size. Must call AdjustToSize() after calling such a method
-    size_t* SizePtr() { return &sz_; }
-
     auto ptr() const { return gpu_buf_.ptr(); }
 
     auto ptr() { return gpu_buf_.ptr(); }
 
-    void AdjustToSize()
-    {
-        if(sz_ != 0)
-        {
-            gpu_buf_ = GPUBuffer(sz_);
-        }
-        else
-        {
-            gpu_buf_ = GPUBuffer();
-        }
-    }
-
     template <typename V>
     void Write(const V& vec)
     {
         using T = typename V::value_type;
         resize(vec.size() * sizeof(T));
-        auto s = hipMemcpy(this->ptr(), &vec[0], size(), hipMemcpyHostToDevice);
-        if(s != hipSuccess)
-        {
-            abort();
-        }
+        HIP_CHECK(hipMemcpy(this->ptr(), &vec[0], size(), hipMemcpyHostToDevice));
     }
 
     template <typename V>
@@ -133,13 +139,10 @@ class Workspace
         using T = typename V::value_type;
         if(vec.size() * sizeof(T) != size())
         {
+            MIOPEN_LOG_E("vector of wrong size passed");
             std::abort();
         }
-        auto s = hipMemcpy(&vec[0], ptr(), size(), hipMemcpyDeviceToHost);
-        if(s != hipSuccess)
-        {
-            abort();
-        }
+        HIP_CHECK(hipMemcpy(&vec[0], ptr(), size(), hipMemcpyDeviceToHost));
     }
 
     template <typename V>