From 5d9d7fcdb442ea82d2da19a15a98eca1349815e1 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Wed, 1 Nov 2023 14:25:21 +0000 Subject: [PATCH 01/14] added checks on workspace params --- src/ocl/convolutionocl.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 1bfba9871d..d94819bd21 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -131,6 +131,16 @@ static inline void ValidateGroupCount(const TensorDescriptor& x, MIOPEN_THROW(miopenStatusBadParm, "Invalid group number"); } +static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize) { + if (!workSpace && workSpaceSize != 0) { + MIOPEN_THROW(miopenStatusBadParm, "workspace size is > 0 but ptr is null"); + } + if (workSpace && workSpaceSize == 0) { + MIOPEN_THROW(miopenStatusBadParm, "workspace size is 0 but ptr is non-null"); + } + /// \todo could add a check here that workSpace points to GPU memory +} + static Invoker PrepareInvoker(ExecutionContext ctx, const conv::ProblemDescription& problem, const NetworkConfig& config, @@ -260,6 +270,7 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle, size_t workSpaceSize, bool exhaustiveSearch) const { + ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize); if(x == nullptr || w == nullptr || y == nullptr) MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL"); @@ -494,6 +505,7 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle, Data_t workSpace, size_t workSpaceSize) const { + ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y}; @@ -807,6 +819,7 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle, const std::size_t workSpaceSize, const solver::Id solver_id) const { + ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y}; @@ -841,6 +854,7 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle, size_t workSpaceSize, bool exhaustiveSearch) const { + ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize); if(dx == nullptr || w == nullptr || dy == nullptr) MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL"); @@ -937,6 +951,7 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle, Data_t workSpace, size_t workSpaceSize) const { + ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); if(!(dyDesc.IsPacked() && wDesc.IsPacked() && dxDesc.IsPacked())) @@ -1009,6 +1024,7 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle, std::size_t workSpaceSize, solver::Id solver_id) const { + ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx}; @@ -1049,6 +1065,7 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle, size_t workSpaceSize, bool exhaustiveSearch) const { + ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize); if(x == nullptr || dw == nullptr || dy == nullptr) MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL"); @@ -1144,6 +1161,7 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle, Data_t workSpace, size_t workSpaceSize) const { + ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw}; ValidateTensors(tensors); @@ -1207,6 +1225,7 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle, std::size_t workSpaceSize, solver::Id solver_id) const { + ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw}; ValidateTensors(tensors); From 75e18bb7c13e69a4ef3608ffee114f2703bccbdb Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Wed, 8 Nov 2023 15:57:40 +0000 Subject: [PATCH 02/14] addressed review comments --- src/ocl/convolutionocl.cpp | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index d94819bd21..413e11d789 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -131,14 +131,16 @@ static inline void ValidateGroupCount(const TensorDescriptor& x, MIOPEN_THROW(miopenStatusBadParm, "Invalid group number"); } -static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize) { - if (!workSpace && workSpaceSize != 0) { - MIOPEN_THROW(miopenStatusBadParm, "workspace size is > 0 but ptr is null"); - } - if (workSpace && workSpaceSize == 0) { - MIOPEN_THROW(miopenStatusBadParm, "workspace size is 0 but ptr is non-null"); - } - /// \todo could add a check here that workSpace points to GPU memory +static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize) +{ + + bool x = (workSpace != nullptr); + bool y = (workSpaceSize != 0); + + assert(((x && y) || (!x && !y)) && "workspace pointer and size don't match. Either both should " + "be zero or both should be non-zero"); + + /// \todo could add a check here that workSpace points to GPU memory } static Invoker PrepareInvoker(ExecutionContext ctx, @@ -270,8 +272,8 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle, size_t workSpaceSize, bool exhaustiveSearch) const { - ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); if(x == nullptr || w == nullptr || y == nullptr) MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL"); if(returnedAlgoCount == nullptr) @@ -505,8 +507,8 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle, Data_t workSpace, size_t workSpaceSize) const { - ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y}; ValidateTensors(tensors); @@ -819,8 +821,8 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle, const std::size_t workSpaceSize, const solver::Id solver_id) const { - ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y}; ValidateTensors(tensors); @@ -854,8 +856,8 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle, size_t workSpaceSize, bool exhaustiveSearch) const { - ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); if(dx == nullptr || w == nullptr || dy == nullptr) MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL"); if(returnedAlgoCount == nullptr) @@ -951,8 +953,8 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle, Data_t workSpace, size_t workSpaceSize) const { - ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); if(!(dyDesc.IsPacked() && wDesc.IsPacked() && dxDesc.IsPacked())) { @@ -1024,8 +1026,8 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle, std::size_t workSpaceSize, solver::Id solver_id) const { - ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx}; ValidateTensors(tensors); @@ -1065,8 +1067,8 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle, size_t workSpaceSize, bool exhaustiveSearch) const { - ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); if(x == nullptr || dw == nullptr || dy == nullptr) MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL"); if(returnedAlgoCount == nullptr) @@ -1161,8 +1163,8 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle, Data_t workSpace, size_t workSpaceSize) const { - ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw}; ValidateTensors(tensors); ValidateAlphaBeta(alpha, beta); @@ -1225,8 +1227,8 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle, std::size_t workSpaceSize, solver::Id solver_id) const { - ValidateWorkspace(workSpace, workSpaceSize); MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw}; ValidateTensors(tensors); From 9bb0750a7fcdada6a8897b30063d14e258ccb851 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Thu, 9 Nov 2023 00:28:22 +0000 Subject: [PATCH 03/14] fix release build warning --- src/ocl/convolutionocl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index eab7649c20..2827ce62a8 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -134,8 +134,8 @@ static inline void ValidateGroupCount(const TensorDescriptor& x, static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize) { - bool x = (workSpace != nullptr); - bool y = (workSpaceSize != 0); + [[maybe_unused]] bool x = (workSpace != nullptr); + [[maybe_unused]] bool y = (workSpaceSize != 0); assert(((x && y) || (!x && !y)) && "workspace pointer and size don't match. Either both should " "be zero or both should be non-zero"); From 675b6a9e172d03712d79868a48353d5b0cb97b88 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Sat, 11 Nov 2023 15:50:22 +0000 Subject: [PATCH 04/14] WIP: workspace abstraction --- test/conv_common.hpp | 97 ++++++--------- test/ctc.cpp | 9 +- test/find_2_conv.cpp | 14 +-- test/find_db.cpp | 27 ++--- test/gtest/conv_api_strided_tensors.cpp | 16 +-- test/gtest/solver_bwd.hpp | 11 +- test/gtest/solver_fwd.hpp | 11 +- test/gtest/solver_wrw.hpp | 11 +- test/pooling_common.hpp | 17 +-- test/reduce_test.cpp | 57 +++++---- test/rnn_vanilla_common.hpp | 60 +++++----- test/workspace.hpp | 149 ++++++++++++++++++++++++ 12 files changed, 303 insertions(+), 176 deletions(-) create mode 100644 test/workspace.hpp diff --git a/test/conv_common.hpp b/test/conv_common.hpp index 60291b8d48..58904c2b65 100644 --- a/test/conv_common.hpp +++ b/test/conv_common.hpp @@ -25,6 +25,7 @@ *******************************************************************************/ #pragma once #include "test.hpp" +#include "workspace.hpp" #include #include #include @@ -320,13 +321,12 @@ struct conv_base EXPECT_EQUAL(miopenStatusSuccess, miopenGetSolutionWorkspaceSize(solution, &workspace_size)); - const auto workspace_dev = workspace_size != 0 - ? get_handle().Write(std::vector(workspace_size)) - : nullptr; + Workspace wspace{}; + wspace.resize(workspace_size); EXPECT_EQUAL(miopenStatusSuccess, miopenRunSolution( - handle, solution, 3, arguments, workspace_dev.get(), workspace_size)); + handle, solution, 3, arguments, wspace.ptr(), wspace.size())); } const auto& solution_deref = miopen::deref(solutions.front()); @@ -584,19 +584,6 @@ struct verify_forward_conv : conv_base return rout; } - void resize_workspace(miopen::Handle& h, - const std::size_t sz, - std::vector& ws, - miopen::Allocator::ManageDataPtr& ws_dev) const - { - ws_dev.reset(); - if(sz > 0) - { - ws.resize(sz); - ws_dev = h.Write(ws); - } - } - tensor gpu() { auto&& handle = get_handle(); @@ -617,8 +604,7 @@ struct verify_forward_conv : conv_base bool fallback_path_taken = false; std::size_t count = 0; - std::vector ws; - miopen::Allocator::ManageDataPtr ws_dev = nullptr; + Workspace wspace{}; const auto ctx = ExecutionContext{&handle}; const auto problem = ConvProblemDescription{ @@ -638,7 +624,7 @@ struct verify_forward_conv : conv_base int ret_algo_count; miopenConvAlgoPerf_t perf; const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - resize_workspace(handle, workspace_size, ws, ws_dev); + wspace.resize(workspace_size); filter.FindConvBwdDataAlgorithm(handle, input.desc, @@ -650,8 +636,8 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - ws_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } count = filter.GetSolutionCount(ctx, problem); @@ -682,7 +668,7 @@ struct verify_forward_conv : conv_base std::cout << "WARNING: workspace size mismatch: " << selected.workspace_size << " != " << ws_size << std::endl; } - resize_workspace(handle, selected.workspace_size, ws, ws_dev); + wspace.resize(selected.workspace_size); filter.CompileSolution(ctx, problem, selected.solution_id); @@ -693,8 +679,8 @@ struct verify_forward_conv : conv_base wei_dev.get(), rout.desc, out_dev.get(), - ws_dev.get(), - selected.workspace_size, + wspace.ptr(), + wspace.size(), selected.solution_id); } else @@ -704,7 +690,7 @@ struct verify_forward_conv : conv_base int ret_algo_count; miopenConvAlgoPerf_t perf; const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - resize_workspace(handle, workspace_size, ws, ws_dev); + wspace.resize(workspace_size); filter.FindConvFwdAlgorithm(handle, input.desc, @@ -716,8 +702,8 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - ws_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } @@ -749,7 +735,7 @@ struct verify_forward_conv : conv_base std::cout << "WARNING: workspace size mismatch: " << selected.workspace_size << " != " << ws_size << std::endl; } - resize_workspace(handle, selected.workspace_size, ws, ws_dev); + wspace.resize(selected.workspace_size); filter.CompileSolution(ctx, problem, selected.solution_id); @@ -760,8 +746,8 @@ struct verify_forward_conv : conv_base in_dev.get(), rout.desc, out_dev.get(), - ws_dev.get(), - selected.workspace_size, + wspace.ptr(), + wspace.size(), selected.solution_id); } break; @@ -817,8 +803,7 @@ struct verify_forward_conv : conv_base if(api == ConvApi::Find_1_0) { const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(workspace_size); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -835,15 +820,13 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionForward(handle, @@ -856,8 +839,8 @@ struct verify_forward_conv : conv_base &beta, rout.desc, out_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); /// \ref read_solver_name auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken); @@ -889,8 +872,7 @@ struct verify_forward_conv : conv_base if(api == ConvApi::Find_1_0) { const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(workspace_size); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -909,15 +891,13 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionBackwardData(handle, @@ -930,8 +910,8 @@ struct verify_forward_conv : conv_base &beta, rout.desc, out_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); } else { @@ -945,15 +925,13 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionForward(handle, @@ -966,8 +944,8 @@ struct verify_forward_conv : conv_base &beta, rout.desc, out_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); } /// \ref read_solver_name @@ -1118,8 +1096,7 @@ struct verify_backward_conv : conv_base { case ConvApi::Immediate: { const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(workspace_size); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1138,8 +1115,8 @@ struct verify_backward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } count = filter.GetSolutionCount(ctx, problem); diff --git a/test/ctc.cpp b/test/ctc.cpp index 6393728957..e9e1713668 100644 --- a/test/ctc.cpp +++ b/test/ctc.cpp @@ -29,6 +29,7 @@ #include "tensor_holder.hpp" #include "test.hpp" #include "verify.hpp" +#include "workspace.hpp" #include "rnn_util.hpp" #include "random.hpp" #include @@ -633,8 +634,8 @@ struct verify_ctcloss inputLengths.data(), miopenCTCLossAlgo_t(0)); - auto workSpace = tensor{workSpaceSize / sizeof(T)}; - auto workSpace_dev = handle.Write(workSpace.data); + Workspace wspace{}; + wspace.resize(workSpaceSize); auto losses_gpu = losses; auto grads_gpu = grads; @@ -653,8 +654,8 @@ struct verify_ctcloss grads.desc, grads_dev.get(), miopenCTCLossAlgo_t(0), - workSpace_dev.get(), - workSpaceSize); + wspace.ptr(), + wspace.size()); losses_gpu.data = handle.Read(losses_dev, losses_gpu.data.size()); grads_gpu.data = handle.Read(grads_dev, grads_gpu.data.size()); diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp index a89ce942a7..9148436ec7 100644 --- a/test/find_2_conv.cpp +++ b/test/find_2_conv.cpp @@ -27,6 +27,7 @@ #include "test.hpp" #include "driver.hpp" #include "get_handle.hpp" +#include "workspace.hpp" #include @@ -210,12 +211,11 @@ struct Find2Test : test_driver } const auto workspace_size = std::min(workspace_limit, workspace_max); - workspace_dev = workspace_size != 0 - ? miopen::deref(handle).Write(std::vector(workspace_size)) - : nullptr; + Workspace wspace{}; + wspace.resize(workspace_size); EXPECT_EQUAL(miopenSetFindOptionPreallocatedWorkspace( - options, workspace_dev.get(), workspace_size), + options, wspace.ptr(), wspace.size()), miopenStatusSuccess); EXPECT_EQUAL(miopenSetFindOptionPreallocatedTensor( @@ -318,8 +318,8 @@ struct Find2Test : test_driver EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size), miopenStatusSuccess); - auto workspace_dev = - workspace_size != 0 ? handle_deref.Write(std::vector(workspace_size)) : nullptr; + Workspace wspace{}; + wspace.resize(workspace_size); const auto checked_run_solution = [&](miopenTensorDescriptor_t* descriptors_) { auto arguments = std::make_unique(num_arguments); @@ -333,7 +333,7 @@ struct Find2Test : test_driver EXPECT_EQUAL( miopenRunSolution( - handle, solution, 3, arguments.get(), workspace_dev.get(), workspace_size), + handle, solution, 3, arguments.get(), wspace.ptr(), wspace.size()), miopenStatusSuccess); }; diff --git a/test/find_db.cpp b/test/find_db.cpp index b5dd4b83d6..974136bc59 100644 --- a/test/find_db.cpp +++ b/test/find_db.cpp @@ -107,9 +107,8 @@ struct FindDbTest : test_driver const auto problem = conv::ProblemDescription{y.desc, w.desc, x.desc, filter, conv::Direction::BackwardData}; const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - - auto workspace = std::vector(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + Workspace wspace{}; + wspace.resize(workspace_size); auto filterCall = [&]() { int ret_algo_count; @@ -125,8 +124,8 @@ struct FindDbTest : test_driver 1, &ret_algo_count, perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), false); }; @@ -141,9 +140,8 @@ struct FindDbTest : test_driver const auto problem = conv::ProblemDescription{x.desc, w.desc, y.desc, filter, conv::Direction::Forward}; const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - - auto workspace = std::vector(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + Workspace wspace{}; + wspace.resize(workspace_size); auto filterCall = [&]() { int ret_algo_count; @@ -159,8 +157,8 @@ struct FindDbTest : test_driver 1, &ret_algo_count, perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), false); }; @@ -175,9 +173,8 @@ struct FindDbTest : test_driver const auto problem = conv::ProblemDescription{ y.desc, w.desc, x.desc, filter, conv::Direction::BackwardWeights}; const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - - auto workspace = std::vector(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + Workspace wspace{}; + wspace.resize(workspace_size); auto filterCall = [&]() { int ret_algo_count; @@ -193,8 +190,8 @@ struct FindDbTest : test_driver 1, &ret_algo_count, perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), false); }; diff --git a/test/gtest/conv_api_strided_tensors.cpp b/test/gtest/conv_api_strided_tensors.cpp index 9a2876b3f0..04f3ccbf19 100644 --- a/test/gtest/conv_api_strided_tensors.cpp +++ b/test/gtest/conv_api_strided_tensors.cpp @@ -29,6 +29,7 @@ #include #include #include "platform.hpp" +#include "../workspace.hpp" #define MIOPEN_CHECK_RET(val) ASSERT_EQ(val, miopenStatusSuccess) @@ -66,10 +67,12 @@ class ConvStridedTensors : public ::testing::Test MIOPEN_CHECK_RET(miopenSetConvolutionGroupCount(conv_descr, 1)); // Workspace + size_t sz = 0; MIOPEN_CHECK_RET(miopenConvolutionForwardGetWorkSpaceSize( - handle, filter_descr, input_descr, conv_descr, output_descr, &workspace_size)); + handle, filter_descr, input_descr, conv_descr, output_descr, &sz)); // Data + wspace.resize(sz); h_input.resize(input_size); h_filter.resize(filter_size); h_output.resize(output_size); @@ -125,7 +128,7 @@ class ConvStridedTensors : public ::testing::Test std::vector dilation = {1, 1, 1}; // Workspace - size_t workspace_size; + Workspace wspace{}; // Data const size_t input_size = input_dims[0] * input_strides[0]; @@ -143,7 +146,6 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented) { auto device = Device(handle); - auto d_workspace = device.Malloc(workspace_size); auto d_input = device.Malloc(input_bytes); auto d_filter = device.Malloc(filter_bytes); auto d_output = device.Malloc(output_bytes); @@ -168,8 +170,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented) sizeof(perf_results) / sizeof(perf_results[0]), &perf_results_count, perf_results, - d_workspace.Data(), - workspace_size, + wspace.ptr(), + wspace.size(), true), miopenStatusSuccess); ASSERT_GT(perf_results_count, 0); @@ -189,8 +191,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented) &beta, output_descr, d_output.Data(), - d_workspace.Data(), - workspace_size), + wspace.ptr(), + wspace.size()), miopenStatusSuccess); ASSERT_TRUE(device.Synchronize()); } diff --git a/test/gtest/solver_bwd.hpp b/test/gtest/solver_bwd.hpp index cb55a5951b..988368e3c0 100644 --- a/test/gtest/solver_bwd.hpp +++ b/test/gtest/solver_bwd.hpp @@ -40,6 +40,7 @@ #include "conv_tensor_gen.hpp" #include "get_solver.hpp" +#include "../workspace.hpp" template struct ConvBwdSolverTest @@ -76,14 +77,13 @@ struct ConvBwdSolverTest if(solv.MayNeedWorkspace()) { const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem); - workspace_dev = handle.Create(cur_sol_ws); - workspace_size = cur_sol_ws; + wspace.resize(cur_sol_ws); } const auto invoke_params = miopen::conv::DataInvokeParams{tensors, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()}; auto sol = GetSolution(solv, ctx, problem); @@ -177,8 +177,7 @@ struct ConvBwdSolverTest miopen::Allocator::ManageDataPtr in_dev; miopen::Allocator::ManageDataPtr wei_dev; miopen::Allocator::ManageDataPtr out_dev; - miopen::Allocator::ManageDataPtr workspace_dev; - size_t workspace_size; + Workspace wspace{}; miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect; bool test_skipped = false; }; diff --git a/test/gtest/solver_fwd.hpp b/test/gtest/solver_fwd.hpp index 88fa9a9c55..a286cc0bbb 100644 --- a/test/gtest/solver_fwd.hpp +++ b/test/gtest/solver_fwd.hpp @@ -38,6 +38,7 @@ #include "conv_test_base.hpp" #include "get_solver.hpp" +#include "../workspace.hpp" template struct ConvFwdSolverTest @@ -77,14 +78,13 @@ struct ConvFwdSolverTest if(solv.MayNeedWorkspace()) { const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem); - workspace_dev = handle.Create(cur_sol_ws); - workspace_size = cur_sol_ws; + wspace.resize(cur_sol_ws); } const auto invoke_params = miopen::conv::DataInvokeParams{tensors, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), this->conv_desc.attribute.gfx90aFp16alt.GetFwd()}; // auto sol = solv.GetSolution(ctx, problem); @@ -116,8 +116,7 @@ struct ConvFwdSolverTest } ConvTestCase conv_config; - miopen::Allocator::ManageDataPtr workspace_dev; - size_t workspace_size; + Workspace wspace{}; miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect; bool test_skipped = false; miopenTensorLayout_t tensor_layout; diff --git a/test/gtest/solver_wrw.hpp b/test/gtest/solver_wrw.hpp index dde92e2071..097c9c6122 100644 --- a/test/gtest/solver_wrw.hpp +++ b/test/gtest/solver_wrw.hpp @@ -40,6 +40,7 @@ #include "conv_tensor_gen.hpp" #include "get_solver.hpp" +#include "../workspace.hpp" template struct ConvWrwSolverTest @@ -76,14 +77,13 @@ struct ConvWrwSolverTest if(solv.MayNeedWorkspace()) { const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem); - workspace_dev = handle.Create(cur_sol_ws); - workspace_size = cur_sol_ws; + wspace.resize(cur_sol_ws); } const auto invoke_params = miopen::conv::WrWInvokeParams{tensors, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()}; auto sol = GetSolution(solv, ctx, problem); @@ -179,8 +179,7 @@ struct ConvWrwSolverTest miopen::Allocator::ManageDataPtr in_dev; miopen::Allocator::ManageDataPtr wei_dev; miopen::Allocator::ManageDataPtr out_dev; - miopen::Allocator::ManageDataPtr workspace_dev; - size_t workspace_size; + Workspace wspace{}; miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect; bool test_skipped = false; }; diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp index 9058d34321..f5e60f5163 100644 --- a/test/pooling_common.hpp +++ b/test/pooling_common.hpp @@ -46,6 +46,7 @@ #include "tensor_holder.hpp" #include "verify.hpp" #include "cpu_conv.hpp" +#include "workspace.hpp" #define TEST_PADDING_MODE 0 // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) @@ -201,7 +202,8 @@ struct verify_forward_pooling auto in_dev = handle.Write(input.data); auto out_dev = handle.Create(out.data.size()); - auto workspace_dev = handle.Write(indices); + Workspace wspace{}; + wspace.Write(indices); float alpha = 1, beta = 0; filter.Forward(handle, @@ -212,10 +214,10 @@ struct verify_forward_pooling out.desc, out_dev.get(), true, - workspace_dev.get(), - indices.size() * sizeof(Index)); + wspace.ptr(), + wspace.size()); - indices = handle.Read(workspace_dev, indices.size()); + indices = wspace.Read(); out.data = handle.Read(out_dev, out.data.size()); return out; } @@ -403,9 +405,8 @@ struct verify_backward_pooling auto out_dev = handle.Write(out.data); auto din_dev = handle.Create(dinput.data.size()); - // std::vector workspace(filter.GetWorkSpaceSize(out.desc)); - // auto workspace_dev = handle.Write(workspace); - auto workspace_dev = handle.Write(indices); + Workspace wspace{}; + wspace.Write(indices); float alpha = 1, beta = 0; filter.Backward(handle, @@ -423,7 +424,7 @@ struct verify_backward_pooling // dx dinput.desc, din_dev.get(), - workspace_dev.get()); + wspace.prt()); dinput.data = handle.Read(din_dev, dinput.data.size()); return dinput; diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index bb2aa17e07..fd4e48ce4f 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -27,6 +27,7 @@ #include "driver.hpp" #include "test.hpp" #include "verify.hpp" +#include "workspace.hpp" #include "get_handle.hpp" #include "tensor_holder.hpp" #include "random.hpp" @@ -60,7 +61,7 @@ struct verify_reduce_with_indices miopenReduceTensorIndices_t indicesOpt; miopenIndicesType_t indicesType; - verify_reduce_with_indices( // NOLINT (hicpp-member-init) + verify_reduce_with_indices( const miopen::ReduceTensorDescriptor& reduce_, const tensor& input_, const tensor& output_, @@ -68,20 +69,20 @@ struct verify_reduce_with_indices const tensor& indices_, float alpha_, float beta_) + : + reduce(reduce_), + input(input_), + output(output_), + workspace(workspace_), + indices(indices_), + alpha(alpha_), + beta(beta_), + reduceOp(reduce.reduceTensorOp_) + compTypeVal(reduce.reduceTensorCompType_), + nanOpt (reduce.reduceTensorNanOpt_), + indicesOpt (reduce.reduceTensorIndices_), + indicesType(reduce.reduceTensorIndicesType_) { - reduce = reduce_; - input = input_; - output = output_; - workspace = workspace_; - indices = indices_; - alpha = alpha_; - beta = beta_; - - reduceOp = reduce.reduceTensorOp_; - compTypeVal = reduce.reduceTensorCompType_; - nanOpt = reduce.reduceTensorNanOpt_; - indicesOpt = reduce.reduceTensorIndices_; - indicesType = reduce.reduceTensorIndicesType_; } tensor cpu() const @@ -339,10 +340,11 @@ struct verify_reduce_with_indices auto res = output; auto res_indices = indices; - auto indices_dev = handle.Write(indices.data); + Workspace idxspace{}; + idxspace.Write(indices.data); - std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T); - std::size_t indices_sizeInBytes = indices.desc.GetElementSize() * sizeof(int); + Workspace wspace{}; + wspace.Write(workspace.data); const double alpha64 = alpha; const double beta64 = beta; @@ -356,13 +358,11 @@ struct verify_reduce_with_indices if(ws_sizeInBytes > 0) { - auto workspace_dev = handle.Write(workspace.data); - reduce.ReduceTensor(get_handle(), indices_dev.get(), indices_sizeInBytes, - workspace_dev.get(), - ws_sizeInBytes, + wspace.ptr(), + wspace.size(), alphaPtr, input.desc, input_dev.get(), @@ -373,8 +373,8 @@ struct verify_reduce_with_indices else { reduce.ReduceTensor(get_handle(), - indices_dev.get(), - indices_sizeInBytes, + idxspace.ptr(), + idxspace.size(), nullptr, 0, alphaPtr, @@ -386,7 +386,7 @@ struct verify_reduce_with_indices }; res.data = handle.Read(output_dev, res.data.size()); - res_indices.data = handle.Read(indices_dev, res_indices.data.size()); + res_indices.data = idxspace.Read(); return (std::make_tuple(res, res_indices)); } @@ -639,7 +639,8 @@ struct verify_reduce_no_indices // replicate auto res = output; - std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T); + Workspace wspace{}; + wspace.Write(workspace.data); const double alpha64 = alpha; const double beta64 = beta; @@ -653,13 +654,11 @@ struct verify_reduce_no_indices if(ws_sizeInBytes > 0) { - auto workspace_dev = handle.Write(workspace.data); - reduce.ReduceTensor(get_handle(), nullptr, 0, - workspace_dev.get(), - ws_sizeInBytes, + wspace.ptr(), + wspace.size(), alphaPtr, input.desc, input_dev.get(), diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp index ef5c1088c3..4d8f6ff1d0 100644 --- a/test/rnn_vanilla_common.hpp +++ b/test/rnn_vanilla_common.hpp @@ -35,6 +35,7 @@ #include "verify.hpp" #include "rnn_util.hpp" #include "random.hpp" +#include "workspace.hpp" #include #include #include @@ -1436,8 +1437,8 @@ struct verify_forward_infer_rnn miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - - std::vector workSpace(workSpaceSize / sizeof(T)); + Workspace wspace{}; + wspace.resize(workSpaceSize); auto input_dev = handle.Write(input); @@ -1450,8 +1451,6 @@ struct verify_forward_infer_rnn std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - auto workSpace_dev = handle.Write(workSpace); - std::vector hlens(3, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); hlens[1] = batch_seq[0]; @@ -1479,8 +1478,8 @@ struct verify_forward_infer_rnn ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize); + wspace.ptr(), + wspace.size()); #if(MIO_RNN_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -1711,8 +1710,10 @@ struct verify_forward_train_rnn miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); - std::vector reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); + Workspace wspace{}; + wspace.resize(workSpaceSize); + Workspace reserveSpace{}; + reserveSpace.resize(reserveSpaceSize); auto input_dev = handle.Write(input); @@ -1726,7 +1727,6 @@ struct verify_forward_train_rnn std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - auto workSpace_dev = handle.Write(workSpace); auto reserveSpace_dev = handle.Write(reserveSpace); std::vector hlens(3, 0); @@ -1756,10 +1756,10 @@ struct verify_forward_train_rnn ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpaceSize); + wspace.ptr(), + wspace.size(), + reserveSpace.ptr(), + reserveSpace.size()); #if(MIO_RNN_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -2002,14 +2002,15 @@ struct verify_backward_data_rnn miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); - auto workSpace_dev = handle.Write(workSpace); + Workspace wspace{}; + wspace.resize(workSpaceSize); miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz); auto yin_dev = handle.Write(yin); auto dyin_dev = handle.Write(dy); // auto dhyin_dev = handle.Write(dhy); - auto reserveSpace_dev = handle.Write(reserveSpace); + Workspace rspace{}; + rspace.Write(reserveSpace); auto weights_dev = handle.Write(weights); // auto hx_dev = handle.Write(initHidden); @@ -2054,15 +2055,15 @@ struct verify_backward_data_rnn ((nodhx) ? nullptr : dhx_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); auto retSet = std::make_tuple(handle.Read(dx_dev, dx.size()), (nodhx ? initHidden : handle.Read(dhx_dev, dhx.size())), - handle.Read(reserveSpace_dev, reserveSpace.size()), - handle.Read(workSpace_dev, workSpace.size())); + rspace.Read>(), + wspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -2257,8 +2258,11 @@ struct verify_backward_weights_rnn hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace); + Workspace wspace{}; + wpace.Write(workSpace); + Workspace rspace{}; + rspace.Write(reserveSpace); + std::vector dweights(weightSize); auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); @@ -2283,10 +2287,10 @@ struct verify_backward_weights_rnn dy_dev.get(), &weightDesc, dweights_dev.get(), - workSpace_dev.get(), - workSpace.size() * sizeof(T), - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); diff --git a/test/workspace.hpp b/test/workspace.hpp new file mode 100644 index 0000000000..37137918a7 --- /dev/null +++ b/test/workspace.hpp @@ -0,0 +1,149 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include + +#include "get_handle.hpp" + +class Workspace { + + // RAII class for hip allocations + class GPUBuffer { + public: + GPUBuffer() = default; + + explicit GPUBuffer(size_t num_bytes) { + assert(num_bytes > 0); + auto s = hipMalloc(&buf_, num_bytes); + if (s != hipSuccess || !buf_) { + std::abort(); + } + } + + ~GPUBuffer() { + auto s = hipFree(buf_); + buf_ = nullptr; + if (s != hipSuccess) { + std::abort(); + } + } + + void* ptr() { return buf_; } + void* ptr() const { return buf_; } + + GPUBuffer(const GPUBuffer&) = delete; + GPUBuffer& operator = (const GPUBuffer&) = delete; + + GPUBuffer(GPUBuffer&& that): + buf_(std::move(that.buf_)) { + that.buf_ = nullptr; // take over ownership + } + + GPUBuffer& operator = (GPUBuffer&& that) { + std::swap(this->buf_, that.buf_); + return *this; + } + + private: + void* buf_ = nullptr; + }; + +public: + Workspace() = default; + + Workspace(const Workspace&) = delete; + Workspace& operator = (const Workspace&) = delete; + Workspace(Workspace&&) = default; + Workspace& operator = (Workspace&&) = default; + + size_t size() const { return sz_; } + + void resize(size_t sz_in_bytes) { + sz_ = sz_in_bytes; + AdjustToSize(); + } + + + // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is + // passed to capture the size. Must call AdjustToSize() after calling such a method + size_t* SizePtr() { return &sz_; } + +#if 0 + auto ptr() const { return data_.get(); } + auto ptr() { return data_.get(); } + void AdjustToSize() { + if (sz_ == 0) { + data_.reset(); + } else { + data_ = handle_.Create(sz_); + } + } +#else + auto ptr() const { return gpu_buf_.ptr(); } + + auto ptr() { return gpu_buf_.ptr(); } + + void AdjustToSize() { + if (sz_ != 0) { + gpu_buf_ = GPUBuffer(sz_); + } else { + gpu_buf_ = GPUBuffer(); + } + } +#endif + + template + void Write(const V& vec) { + using T = typename V::value_type; + auto bytes = vec.size() * sizeof(T); + resize(bytes); + auto s = hipMemcpyHostToDevice(this->ptr(), &vec[0], bytes); + if(s != hipSuccess) { + abort(); + } + } + + template + V Read() const { + using T = typename V::value_type; + size_t num_elem = size() / sizeof(T); + V ret(num_elem); + auto s = hipMemcpyDeviceToHost(&ret[0], ptr(), size()); + if (s != hipSuccess) { + abort(); + } + return ret; + } + +private: + // miopen::Handle& handle_; + // miopen::Allocator::ManageDataPtr data_{}; + GPUBuffer gpu_buf_{}; + size_t sz_{}; + + +}; From 4b3307912512433d66417cc28b6b91c13bf22128 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Wed, 15 Nov 2023 03:58:42 +0000 Subject: [PATCH 05/14] added an abstraction for workspace --- test/conv_common.hpp | 131 +++++++++++++-------------------- test/find_2_conv.cpp | 2 - test/find_db.cpp | 1 + test/gru_common.hpp | 68 ++++++++++-------- test/lstm_common.hpp | 140 +++++++++++++++++++----------------- test/main.cpp | 108 ++++++---------------------- test/pooling_common.hpp | 4 +- test/reduce_test.cpp | 10 +-- test/rnn_vanilla_common.hpp | 13 ++-- test/tensor_reorder.cpp | 74 +++++-------------- test/workspace.hpp | 9 +-- 11 files changed, 219 insertions(+), 341 deletions(-) diff --git a/test/conv_common.hpp b/test/conv_common.hpp index 58904c2b65..5fd71e2f2a 100644 --- a/test/conv_common.hpp +++ b/test/conv_common.hpp @@ -623,8 +623,7 @@ struct verify_forward_conv : conv_base { int ret_algo_count; miopenConvAlgoPerf_t perf; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - wspace.resize(workspace_size); + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); filter.FindConvBwdDataAlgorithm(handle, input.desc, @@ -689,8 +688,7 @@ struct verify_forward_conv : conv_base { int ret_algo_count; miopenConvAlgoPerf_t perf; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - wspace.resize(workspace_size); + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); filter.FindConvFwdAlgorithm(handle, input.desc, @@ -802,8 +800,7 @@ struct verify_forward_conv : conv_base if(api == ConvApi::Find_1_0) { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - wspace.resize(workspace_size); + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -871,8 +868,7 @@ struct verify_forward_conv : conv_base { if(api == ConvApi::Find_1_0) { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - wspace.resize(workspace_size); + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1080,6 +1076,8 @@ struct verify_backward_conv : conv_base auto wei_dev = handle.Write(weights.data); auto in_dev = handle.Write(rinput.data); + Workspace wspace{}; + miopenConvSolution_t selected; bool fallback_path_taken = false; std::size_t count = 0; @@ -1095,8 +1093,7 @@ struct verify_backward_conv : conv_base switch(api) { case ConvApi::Immediate: { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - wspace.resize(workspace_size); + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1144,18 +1141,14 @@ struct verify_backward_conv : conv_base }); selected = std::move(solutions.front()); - std::size_t ws_size; - - ws_size = filter.GetForwardSolutionWorkspaceSize( + [[maybe_unused]] std::size_t ws_size = filter.GetForwardSolutionWorkspaceSize( handle, weights.desc, out.desc, rinput.desc, selected.solution_id); filter.CompileSolution(ctx, problem, selected.solution_id); - workspace_dev.reset(); if(selected.workspace_size > 0) { - workspace.resize(selected.workspace_size); - workspace_dev = handle.Write(workspace); + wspace.resize(selected.workspace_size); } filter.ConvolutionForwardImmediate(handle, @@ -1165,8 +1158,8 @@ struct verify_backward_conv : conv_base out_dev.get(), rinput.desc, in_dev.get(), - workspace_dev.get(), - ws_size, + wspace.ptr(), + wspace.size(), selected.solution_id); } else @@ -1183,8 +1176,8 @@ struct verify_backward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } count = filter.GetSolutionCount(ctx, problem); @@ -1211,18 +1204,14 @@ struct verify_backward_conv : conv_base }); selected = std::move(solutions.front()); - std::size_t ws_size; - - ws_size = filter.GetBackwardSolutionWorkspaceSize( + [[maybe_unused]] std::size_t ws_size = filter.GetBackwardSolutionWorkspaceSize( handle, out.desc, weights.desc, rinput.desc, selected.solution_id); filter.CompileSolution(ctx, problem, selected.solution_id); - workspace_dev.reset(); if(selected.workspace_size > 0) { - workspace.resize(selected.workspace_size); - workspace_dev = handle.Write(workspace); + wspace.resize(selected.workspace_size); } filter.ConvolutionBackwardImmediate(handle, @@ -1232,16 +1221,14 @@ struct verify_backward_conv : conv_base wei_dev.get(), rinput.desc, in_dev.get(), - workspace_dev.get(), - ws_size, + wspace.ptr(), + wspace.size(), selected.solution_id); } break; } case ConvApi::Find_1_0: { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1260,15 +1247,13 @@ struct verify_backward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionForward(handle, @@ -1281,8 +1266,8 @@ struct verify_backward_conv : conv_base &beta, rinput.desc, in_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); } else { @@ -1296,15 +1281,13 @@ struct verify_backward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionBackwardData(handle, @@ -1317,8 +1300,8 @@ struct verify_backward_conv : conv_base &beta, rinput.desc, in_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); } /// \ref read_solver_name @@ -1448,6 +1431,7 @@ struct verify_backward_weights_conv : conv_base auto out_dev = handle.Write(out.data); auto wei_dev = handle.Write(rweights.data); auto in_dev = handle.Write(input.data); + Workspace wspace{}; miopenConvSolution_t selected; bool fallback_path_taken = false; @@ -1464,9 +1448,7 @@ struct verify_backward_weights_conv : conv_base switch(api) { case ConvApi::Immediate: { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1484,8 +1466,8 @@ struct verify_backward_weights_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } @@ -1512,9 +1494,7 @@ struct verify_backward_weights_conv : conv_base }); selected = std::move(solutions.front()); - std::size_t ws_size; - - ws_size = filter.GetWrwSolutionWorkspaceSize( + [[maybe_unused]] std::size_t ws_size = filter.GetWrwSolutionWorkspaceSize( handle, filter.mode == miopenTranspose ? input.desc : out.desc, filter.mode == miopenTranspose ? out.desc : input.desc, @@ -1523,11 +1503,9 @@ struct verify_backward_weights_conv : conv_base filter.CompileSolution(ctx, problem, selected.solution_id); - workspace_dev.reset(); if(selected.workspace_size > 0) { - workspace.resize(selected.workspace_size); - workspace_dev = handle.Write(workspace); + wspace.resize(selected.workspace_size); } filter.ConvolutionWrwImmediate( @@ -1538,16 +1516,14 @@ struct verify_backward_weights_conv : conv_base filter.mode == miopenTranspose ? out_dev.get() : in_dev.get(), rweights.desc, wei_dev.get(), - workspace_dev.get(), - ws_size, + wspace.ptr(), + wspace.size(), selected.solution_id); break; } case ConvApi::Find_1_0: { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1564,15 +1540,13 @@ struct verify_backward_weights_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionBackwardWeights( @@ -1586,8 +1560,8 @@ struct verify_backward_weights_conv : conv_base &beta, rweights.desc, wei_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); /// \ref read_solver_name const auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken); @@ -1739,9 +1713,8 @@ struct verify_forward_conv_int8 : conv_base wei_vpad_dev.get()); } - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + Workspace wspace{}; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1758,8 +1731,8 @@ struct verify_forward_conv_int8 : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } @@ -1787,9 +1760,7 @@ struct verify_forward_conv_int8 : conv_base }); auto selected = std::move(solutions.front()); - std::size_t ws_size; - - ws_size = + [[maybe_unused]] std::size_t ws_size = filter.GetForwardSolutionWorkspaceSize(handle, (is_transform ? weight_vpad_desc : weights.desc), (is_transform ? input_vpad_desc : input.desc), @@ -1798,11 +1769,9 @@ struct verify_forward_conv_int8 : conv_base filter.CompileSolution(ctx, problem, selected.solution_id); - workspace_dev.reset(); if(selected.workspace_size > 0) { - workspace.resize(selected.workspace_size); - workspace_dev = handle.Write(workspace); + wspace.resize(selected.workspace_size); } filter.ConvolutionForwardImmediate(handle, @@ -1812,8 +1781,8 @@ struct verify_forward_conv_int8 : conv_base (is_transform ? in_vpad_dev.get() : in_dev.get()), rout.desc, out_dev.get(), - workspace_dev.get(), - ws_size, + wspace.ptr(), + wspace.size(), selected.solution_id); if(count != 0) diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp index 9148436ec7..8ed26211e3 100644 --- a/test/find_2_conv.cpp +++ b/test/find_2_conv.cpp @@ -312,8 +312,6 @@ struct Find2Test : test_driver { std::cerr << "Running a solution..." << std::endl; - auto& handle_deref = get_handle(); - std::size_t workspace_size; EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size), miopenStatusSuccess); diff --git a/test/find_db.cpp b/test/find_db.cpp index 974136bc59..3977f9e220 100644 --- a/test/find_db.cpp +++ b/test/find_db.cpp @@ -27,6 +27,7 @@ #include "test.hpp" #include "driver.hpp" #include "get_handle.hpp" +#include "workspace.hpp" #include #include diff --git a/test/gru_common.hpp b/test/gru_common.hpp index 51e3c09012..736ff96dfc 100644 --- a/test/gru_common.hpp +++ b/test/gru_common.hpp @@ -35,6 +35,7 @@ #include "verify.hpp" #include "rnn_util.hpp" #include "random.hpp" +#include "workspace.hpp" #include #include #include @@ -1964,7 +1965,7 @@ struct verify_forward_infer_gru auto&& handle = get_handle(); size_t out_sz = 0; - size_t workSpaceSize = 0; + size_t workspace_size = 0; std::vector inputCPPDescs; std::vector inputDescs; @@ -1979,9 +1980,9 @@ struct verify_forward_infer_gru hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - std::vector workSpace(workSpaceSize / sizeof(T)); + std::vector workSpace(workspace_size / sizeof(T)); auto input_dev = handle.Write(input); @@ -2028,7 +2029,7 @@ struct verify_forward_infer_gru &hiddenDesc, nullptr, workSpace_dev.get(), - workSpaceSize); + workspace_size); #if(MIO_GRU_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -2249,7 +2250,7 @@ struct verify_forward_train_gru auto&& handle = get_handle(); size_t out_sz = 0; - size_t workSpaceSize = 0; + size_t workspace_size = 0; size_t reserveSpaceSize = 0; std::vector inputCPPDescs; @@ -2265,11 +2266,11 @@ struct verify_forward_train_gru hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); + std::vector workSpace(workspace_size / sizeof(T)); std::vector reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); auto input_dev = handle.Write(input); @@ -2319,7 +2320,7 @@ struct verify_forward_train_gru &hiddenDesc, nullptr, workSpace_dev.get(), - workSpaceSize, + workspace_size, reserveSpace_dev.get(), reserveSpaceSize); @@ -2484,7 +2485,7 @@ struct verify_backward_data_gru int bi = dirMode != 0 ? 2 : 1; int hy_h = hiddenSize; int bi_stride = bi * hy_h; - size_t workSpaceSize; + size_t workspace_size; std::vector inputCPPDescs; std::vector inputDescs; @@ -2494,8 +2495,8 @@ struct verify_backward_data_gru // Outputs ---------- size_t in_sz = 0; miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, inputDescs.data(), &in_sz); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + std::vector workSpace(workspace_size / sizeof(T)); std::vector dx(in_sz / sizeof(T)); std::vector dhx(initHidden.size()); @@ -2563,7 +2564,6 @@ struct verify_backward_data_gru auto&& handle = get_handle(); size_t out_sz = 0; - size_t workSpaceSize = 0; std::vector inputCPPDescs; std::vector inputDescs; @@ -2578,16 +2578,19 @@ struct verify_backward_data_gru hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); - auto workSpace_dev = handle.Write(workSpace); + size_t workspace_size = 0; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + Workspace wspace{}; + wspace.resize(workspace_size); miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz); auto yin_dev = handle.Write(yin); auto dyin_dev = handle.Write(dy); - auto reserveSpace_dev = handle.Write(reserveSpace); auto weights_dev = handle.Write(weights); + Workspace rspace{}; + rspace.Write(reserveSpace); + std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; @@ -2633,15 +2636,15 @@ struct verify_backward_data_gru ((nodhx) ? nullptr : dhx_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); auto retSet = std::make_tuple(handle.Read(dx_dev, dx.size()), (nodhx ? initHidden : handle.Read(dhx_dev, dhx.size())), - handle.Read(reserveSpace_dev, reserveSpace.size()), - handle.Read(workSpace_dev, workSpace.size())); + rspace.Read>(), + wspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -2840,8 +2843,11 @@ struct verify_backward_weights_gru hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace); + Workspace wspace{}; + wspace.Write(workSpace); + Workspace rspace{}; + rspace.Write(reserveSpace); + std::vector dweights(weightSize); auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); @@ -2869,10 +2875,10 @@ struct verify_backward_weights_gru dy_dev.get(), &weightDesc, dweights_dev.get(), - workSpace_dev.get(), - workSpace.size() * sizeof(T), - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -3123,10 +3129,10 @@ struct gru_basic_driver : test_driver size_t reserveSpaceSize; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - size_t workSpaceSize; - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + size_t workspace_size; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize + 2 * out_sz + + size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size + 2 * out_sz + (in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) + (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz)) * sizeof(T); diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index 1f0d5f98cb..29f132c1e7 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -27,6 +27,7 @@ #ifndef GUARD_MIOPEN_TEST_LSTM_COMMON_HPP #define GUARD_MIOPEN_TEST_LSTM_COMMON_HPP +#include "workspace.hpp" #include "driver.hpp" #include "dropout_util.hpp" #include "get_handle.hpp" @@ -96,6 +97,8 @@ struct verify_backward_data_lstm std::vector initHidden; // HX std::vector initCell; // CX std::vector weights; + std::vector& RSVgpu; + std::vector& RSVcpu; std::vector batch_seq; int hiddenSize; int seqLength; @@ -115,8 +118,6 @@ struct verify_backward_data_lstm bool nodcx; bool use_dropout; bool use_seqPadding; - typename std::vector::iterator RSVgpu; - typename std::vector::iterator RSVcpu; verify_backward_data_lstm(miopenRNNDescriptor_t pRD, const std::vector& py, @@ -153,6 +154,8 @@ struct verify_backward_data_lstm initHidden(phx), initCell(pcx), weights(pW), + RSVgpu(pRSVgpu), + RSVcpu(pRSVcpu), batch_seq(pBS), hiddenSize(pHS), seqLength(pS), @@ -171,9 +174,7 @@ struct verify_backward_data_lstm nodhx(pnodhx), nodcx(pnodcx), use_dropout(puse_dropout), - use_seqPadding(puse_seqPadding), - RSVgpu(pRSVgpu.begin()), - RSVcpu(pRSVcpu.begin()) + use_seqPadding(puse_seqPadding) { if(!nohx) initHidden = phx; // this may be intentionally a nullptr @@ -248,6 +249,8 @@ struct verify_backward_weights_lstm std::vector input; // Y std::vector dy; // dY std::vector initHidden; // HX + std::vector reserveSpace_gpu; + std::vector reserveSpace_cpu; std::vector workSpace; std::vector batch_seq; int weightSize; @@ -264,8 +267,6 @@ struct verify_backward_weights_lstm bool nohx; bool use_dropout; bool use_seqPadding; - typename std::vector reserveSpace_gpu; - typename std::vector reserveSpace_cpu; verify_backward_weights_lstm(miopenRNNDescriptor_t pRD, const std::vector& px, @@ -291,6 +292,8 @@ struct verify_backward_weights_lstm : input(px), dy(pdy), initHidden(phx), + reserveSpace_gpu(pRSVgpu), + reserveSpace_cpu(pRSVcpu), workSpace(pWS), batch_seq(pBS), weightSize(pW), @@ -306,9 +309,7 @@ struct verify_backward_weights_lstm realHiddenSize(pHXZ), nohx(pnohx), use_dropout(puse_dropout), - use_seqPadding(puse_seqPadding), - reserveSpace_gpu(pRSVgpu), - reserveSpace_cpu(pRSVcpu) + use_seqPadding(puse_seqPadding) { if(!nohx) initHidden = phx; // this may be intentionally a nullptr @@ -534,7 +535,6 @@ struct verify_forward_infer_lstm : verify_forward_lstm auto&& handle = get_handle(); size_t out_sz = 0; - size_t workSpaceSize = 0; std::vector inputCPPDescs; std::vector inputDescs; @@ -549,9 +549,11 @@ struct verify_forward_infer_lstm : verify_forward_lstm hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + size_t workspace_size = 0; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - std::vector workSpace(workSpaceSize / sizeof(T)); + Workspace wspace{}; + wspace.resize(workspace_size); auto input_dev = handle.Write(input); @@ -565,8 +567,6 @@ struct verify_forward_infer_lstm : verify_forward_lstm auto cy = initCell; std::fill(cy.begin(), cy.end(), 0.); - auto workSpace_dev = handle.Write(workSpace); - std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; @@ -577,6 +577,9 @@ struct verify_forward_infer_lstm : verify_forward_lstm wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); + /// \todo: fix the handle.Write() calls below because they generate + /// temporary objects that may get destroyed before the + /// miopenRNNForwardInference call happens miopenRNNForwardInference(&handle, rnnDesc, seqLength, @@ -594,8 +597,8 @@ struct verify_forward_infer_lstm : verify_forward_lstm ((nohy) ? nullptr : handle.Write(hy).get()), &hiddenDesc, ((nocy) ? nullptr : handle.Write(cy).get()), - workSpace_dev.get(), - workSpaceSize); + wspace.ptr(), + wspace.size()); #if(MIO_LSTM_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -673,9 +676,10 @@ struct verify_forward_train_lstm : verify_forward_lstm using verify_forward_lstm::nocy; using verify_forward_lstm::use_seqPadding; + std::vector& RSVgpu; + std::vector& RSVcpu; + bool use_dropout; - typename std::vector::iterator RSVgpu; - typename std::vector::iterator RSVcpu; verify_forward_train_lstm(miopenRNNDescriptor_t pRD, const std::vector& px, @@ -700,7 +704,7 @@ struct verify_forward_train_lstm : verify_forward_lstm const bool pnocy = false, const bool puse_dropout = false, const bool puse_seqPadding = false) - : RSVgpu(pRSVgpu.begin()), RSVcpu(pRSVcpu.begin()) + : RSVgpu(pRSVgpu), RSVcpu(pRSVcpu) { input = px; initHidden = phx; @@ -861,7 +865,10 @@ struct verify_forward_train_lstm : verify_forward_lstm ChangeDataPadding(*packed_output, output, batch_seq, batch_seq[0], out_h, true); } - std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu); + if (reserveSpace.size() != RSVcpu.size()) { + std::abort(); + } + std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin()); auto retSet = std::make_tuple( output, (nohy ? initHidden : hiddenState), (nocy ? initCell : cellState)); @@ -908,17 +915,16 @@ struct verify_forward_train_lstm : verify_forward_lstm std::fill(output.begin(), output.end(), static_cast(0)); auto output_dev = handle.Write(output); - size_t workSpaceSize = 0; + size_t workspace_size = 0; size_t reserveSpaceSize = 0; - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); - std::vector reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); - - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace); + Workspace wspace{}; + Workspace rspace{}; + wspace.resize(workspace_size); + rspace.resize(reserveSpaceSize); auto weights_dev = handle.Write(weights); @@ -957,10 +963,10 @@ struct verify_forward_train_lstm : verify_forward_lstm ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, ((nocy) ? nullptr : cy_dev.get()), - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpaceSize); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_LSTM_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -969,9 +975,7 @@ struct verify_forward_train_lstm : verify_forward_lstm printf("GPU outdata[%d]: %f\n", i, outdata[i]); } #endif - reserveSpace = - handle.Read(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); - std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu); + RSVgpu = rspace.Read>(); std::vector output_gpu = handle.Read(output_dev, output.size()); @@ -1049,7 +1053,7 @@ verify_backward_data_lstm::cpu() const int hy_h = hiddenSize; int bi_stride = bi * hy_h; int out_h = hiddenSize * ((dirMode != 0) ? 2 : 1); - size_t workSpaceSize; + size_t workspace_size; std::vector inputCPPDescs; std::vector inputDescs; @@ -1066,8 +1070,8 @@ verify_backward_data_lstm::cpu() const true, use_seqPadding); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + std::vector workSpace(workspace_size / sizeof(T)); std::vector dx(in_sz); std::vector dhx(initHidden.size()); std::vector dcx(initHidden.size()); @@ -1086,8 +1090,7 @@ verify_backward_data_lstm::cpu() const reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) / sizeof(T); } - std::vector reserveSpace(reserveSpaceSize); - std::copy(RSVcpu, RSVcpu + reserveSpaceSize, reserveSpace.begin()); + std::vector reserveSpace(RSVcpu.begin(), RSVcpu.begin() + reserveSpaceSize); std::vector converted_dinput; std::vector converted_output; @@ -1119,7 +1122,7 @@ verify_backward_data_lstm::cpu() const packed_doutput = &converted_doutput; // WA - wa_workSpace.resize(workSpaceSize / sizeof(T) - (packedXInSize + packedYOutSize)); + wa_workSpace.resize(workspace_size / sizeof(T) - (packedXInSize + packedYOutSize)); wa_shifted_workSpace = &wa_workSpace; } else @@ -1186,7 +1189,7 @@ verify_backward_data_lstm::cpu() const workSpace.begin() + converted_doutput.size() + converted_dinput.size()); } - std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu); + std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin()); // TODO: remove workSpace auto retSet = @@ -1210,7 +1213,6 @@ verify_backward_data_lstm::gpu() const auto&& handle = get_handle(); - size_t workSpaceSize = 0; std::vector inputCPPDescs; std::vector inputDescs; @@ -1225,19 +1227,22 @@ verify_backward_data_lstm::gpu() const hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); - auto workSpace_dev = handle.Write(workSpace); + size_t workspace_size = 0; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + Workspace wspace{}; + wspace.resize(workspace_size); - size_t reserveSpaceSize; + size_t reserveSpaceSize = 0; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - std::vector reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); - std::copy(RSVgpu, RSVgpu + reserveSpace.size(), reserveSpace.begin()); + if (reserveSpaceSize != RSVgpu.size()) { + std::abort(); + } + Workspace rspace{}; + rspace.Write(RSVgpu); auto yin_dev = handle.Write(yin); auto dyin_dev = handle.Write(dy); - auto reserveSpace_dev = handle.Write(reserveSpace); auto weights_dev = handle.Write(weights); std::vector hlens(3, 0); @@ -1284,18 +1289,17 @@ verify_backward_data_lstm::gpu() const ((nodhx) ? nullptr : dhx_dev.get()), &hiddenDesc, ((nodcx) ? nullptr : dcx_dev.get()), - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); - reserveSpace = handle.Read(reserveSpace_dev, reserveSpace.size()); - std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu); + RSVgpu = rspace.Read>(); // TODO: remove workSpace auto retSet = std::make_tuple(handle.Read(dx_dev, dx.size()), (nodhx ? initHidden : handle.Read(dhx_dev, dhx.size())), (nodcx ? initCell : handle.Read(dcx_dev, dcx.size())), - handle.Read(workSpace_dev, workSpace.size())); + wspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -1427,8 +1431,12 @@ std::vector verify_backward_weights_lstm::gpu() const hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace_gpu); + Workspace wspace{}; + wspace.Write(workSpace); + + Workspace rspace{}; + rspace.Write(reserveSpace_gpu); + std::vector dweights(weightSize); auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); @@ -1452,10 +1460,10 @@ std::vector verify_backward_weights_lstm::gpu() const dy_dev.get(), &weightDesc, dweights_dev.get(), - workSpace_dev.get(), - workSpace.size() * sizeof(T), - reserveSpace_dev.get(), - reserveSpace_gpu.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -1732,10 +1740,10 @@ struct lstm_basic_driver : test_driver false, usePadding); - size_t workSpaceSize; - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + size_t workspace_size; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize + + size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size + (2 * out_sz + in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) + (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz) + (nocx ? 0 : hx_sz) + (nocy ? 0 : hx_sz) + (nodcx ? 0 : hx_sz) + (nodcy ? 0 : hx_sz)) * diff --git a/test/main.cpp b/test/main.cpp index 8d7f515f22..7a5d0cd5b0 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -23,8 +23,10 @@ * SOFTWARE. * *******************************************************************************/ +#include "get_handle.hpp" #include "test.hpp" #include "random.hpp" +#include "workspace.hpp" #include #include #include @@ -41,16 +43,10 @@ struct handle_fixture { miopenHandle_t handle{}; -#if MIOPEN_BACKEND_OPENCL - cl_command_queue q{}; -#endif handle_fixture() { miopenCreate(&handle); -#if MIOPEN_BACKEND_OPENCL - miopenGetStream(handle, &q); -#endif } ~handle_fixture() { miopenDestroy(handle); } @@ -171,7 +167,6 @@ struct conv_forward : output_tensor_fixture { float alpha = 1, beta = 0; - // Setup OpenCL buffers int n, h, c, w; STATUS(miopenGet4dTensorDescriptorLengths(inputTensor, &n, &c, &h, &w)); @@ -186,15 +181,13 @@ struct conv_forward : output_tensor_fixture size_t sz_fwd_workspace; STATUS(miopenConvolutionForwardGetWorkSpaceSize( handle, convFilter, inputTensor, convDesc, outputTensor, &sz_fwd_workspace)); - // OCL fails to allocate zero workspace. Let's allocate small workspace instead to simplify - // subsequent code. - if(sz_fwd_workspace == 0) - sz_fwd_workspace = 256; + + Workspace wspace{}; + wspace.resize(sz_fwd_workspace); std::vector in(sz_in); std::vector wei(sz_wei); std::vector out(sz_out); - std::vector fwd_workspace(sz_fwd_workspace / 4); for(size_t i = 0; i < sz_in; i++) { @@ -205,60 +198,16 @@ struct conv_forward : output_tensor_fixture wei[i] = prng::gen_A_to_B(-0.5f, 0.5f) * 0.001f; } -#if MIOPEN_BACKEND_OPENCL - - cl_context ctx; - clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, nullptr); - - cl_int status = CL_SUCCESS; - cl_mem in_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_in, nullptr, &status); - cl_mem wei_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_wei, nullptr, nullptr); - cl_mem out_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, 4 * sz_out, nullptr, nullptr); - cl_mem fwd_workspace_dev = - clCreateBuffer(ctx, CL_MEM_READ_WRITE, sz_fwd_workspace, nullptr, nullptr); - - status = - clEnqueueWriteBuffer(q, in_dev, CL_TRUE, 0, 4 * sz_in, in.data(), 0, nullptr, nullptr); - status |= clEnqueueWriteBuffer( - q, wei_dev, CL_TRUE, 0, 4 * sz_wei, wei.data(), 0, nullptr, nullptr); - status |= clEnqueueWriteBuffer( - q, out_dev, CL_TRUE, 0, 4 * sz_out, out.data(), 0, nullptr, nullptr); - status |= clEnqueueWriteBuffer(q, - fwd_workspace_dev, - CL_TRUE, - 0, - sz_fwd_workspace, - fwd_workspace.data(), - 0, - nullptr, - nullptr); - EXPECT(status == CL_SUCCESS); - -#elif MIOPEN_BACKEND_HIP - - void* in_dev; - void* wei_dev; - void* out_dev; - void* fwd_workspace_dev; - - EXPECT(hipMalloc(&in_dev, 4 * sz_in) == hipSuccess); - EXPECT(hipMalloc(&wei_dev, 4 * sz_wei) == hipSuccess); - EXPECT(hipMalloc(&out_dev, 4 * sz_out) == hipSuccess); - EXPECT(hipMalloc(&fwd_workspace_dev, sz_fwd_workspace) == hipSuccess); - - EXPECT(hipMemcpy(in_dev, in.data(), 4 * sz_in, hipMemcpyHostToDevice) == hipSuccess); - EXPECT(hipMemcpy(wei_dev, wei.data(), 4 * sz_wei, hipMemcpyHostToDevice) == hipSuccess); - EXPECT(hipMemcpy(out_dev, out.data(), 4 * sz_out, hipMemcpyHostToDevice) == hipSuccess); - EXPECT(hipMemcpy(fwd_workspace_dev, - fwd_workspace.data(), - sz_fwd_workspace, - hipMemcpyHostToDevice) == hipSuccess); + auto& mhand = get_handle(); + + auto in_dev = mhand.Write(in); + auto wei_dev = mhand.Write(wei); + auto out_dev = mhand.Write(out); -#endif int value = 10; - STATUS(miopenSetTensor(handle, inputTensor, in_dev, &value)); + STATUS(miopenSetTensor(handle, inputTensor, in_dev.get(), &value)); - STATUS(miopenScaleTensor(handle, inputTensor, in_dev, &alpha)); + STATUS(miopenScaleTensor(handle, inputTensor, in_dev.get(), &alpha)); float time; @@ -280,32 +229,32 @@ struct conv_forward : output_tensor_fixture STATUS(miopenFindConvolutionForwardAlgorithm( used_handle, inputTensor, - in_dev, + in_dev.get(), convFilter, - wei_dev, + wei_dev.get(), convDesc, outputTensor, - out_dev, + out_dev.get(), 1, &ret_algo_count, &perf, - fwd_workspace_dev, - sz_fwd_workspace, + wspace.ptr(), + wspace.size(), 0)); // MD: Not performing exhaustiveSearch by default for now STATUS(miopenConvolutionForward(used_handle, &alpha, inputTensor, - in_dev, + in_dev.get(), convFilter, - wei_dev, + wei_dev.get(), convDesc, perf.fwd_algo, &beta, outputTensor, - out_dev, - fwd_workspace_dev, - sz_fwd_workspace)); + out_dev.get(), + wspace.ptr(), + wspace.size())); STATUS(miopenGetKernelTime(used_handle, &time)); @@ -323,19 +272,6 @@ struct conv_forward : output_tensor_fixture CHECK(time == 0.0); } -// Potential memory leak free memory at end of function -#if MIOPEN_BACKEND_OPENCL - clReleaseMemObject(in_dev); - clReleaseMemObject(wei_dev); - clReleaseMemObject(out_dev); - clReleaseMemObject(fwd_workspace_dev); - -#elif MIOPEN_BACKEND_HIP - hipFree(in_dev); - hipFree(wei_dev); - hipFree(out_dev); - hipFree(fwd_workspace_dev); -#endif } }; diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp index f5e60f5163..a98d9fb82d 100644 --- a/test/pooling_common.hpp +++ b/test/pooling_common.hpp @@ -217,7 +217,7 @@ struct verify_forward_pooling wspace.ptr(), wspace.size()); - indices = wspace.Read(); + indices = wspace.Read>(); out.data = handle.Read(out_dev, out.data.size()); return out; } @@ -424,7 +424,7 @@ struct verify_backward_pooling // dx dinput.desc, din_dev.get(), - wspace.prt()); + wspace.ptr()); dinput.data = handle.Read(din_dev, dinput.data.size()); return dinput; diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index fd4e48ce4f..a48d1fc9b8 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -77,7 +77,7 @@ struct verify_reduce_with_indices indices(indices_), alpha(alpha_), beta(beta_), - reduceOp(reduce.reduceTensorOp_) + reduceOp(reduce.reduceTensorOp_), compTypeVal(reduce.reduceTensorCompType_), nanOpt (reduce.reduceTensorNanOpt_), indicesOpt (reduce.reduceTensorIndices_), @@ -356,11 +356,11 @@ struct verify_reduce_with_indices ? static_cast(&beta64) : static_cast(&beta); - if(ws_sizeInBytes > 0) + if(wspace.size() > 0) { reduce.ReduceTensor(get_handle(), - indices_dev.get(), - indices_sizeInBytes, + idxspace.ptr(), + idxspace.size(), wspace.ptr(), wspace.size(), alphaPtr, @@ -652,7 +652,7 @@ struct verify_reduce_no_indices ? static_cast(&beta64) : static_cast(&beta); - if(ws_sizeInBytes > 0) + if(wspace.size() > 0) { reduce.ReduceTensor(get_handle(), nullptr, diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp index 4d8f6ff1d0..9a96cfb106 100644 --- a/test/rnn_vanilla_common.hpp +++ b/test/rnn_vanilla_common.hpp @@ -1712,8 +1712,8 @@ struct verify_forward_train_rnn Workspace wspace{}; wspace.resize(workSpaceSize); - Workspace reserveSpace{}; - reserveSpace.resize(reserveSpaceSize); + Workspace rspace{}; + rspace.resize(reserveSpaceSize); auto input_dev = handle.Write(input); @@ -1727,7 +1727,6 @@ struct verify_forward_train_rnn std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - auto reserveSpace_dev = handle.Write(reserveSpace); std::vector hlens(3, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); @@ -1758,8 +1757,8 @@ struct verify_forward_train_rnn nullptr, wspace.ptr(), wspace.size(), - reserveSpace.ptr(), - reserveSpace.size()); + rspace.ptr(), + rspace.size()); #if(MIO_RNN_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -1772,7 +1771,7 @@ struct verify_forward_train_rnn auto retSet = std::make_tuple( handle.Read(output_dev, output.size()), (nohy ? initHidden : handle.Read(hy_dev, hy.size())), - handle.Read(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T))); + rspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -2259,7 +2258,7 @@ struct verify_backward_weights_rnn miopen::deref(rnnDesc).dataType); Workspace wspace{}; - wpace.Write(workSpace); + wspace.Write(workSpace); Workspace rspace{}; rspace.Write(reserveSpace); diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp index fcf3420320..c2ce8a94a9 100644 --- a/test/tensor_reorder.cpp +++ b/test/tensor_reorder.cpp @@ -38,6 +38,8 @@ #include "test.hpp" #include "driver.hpp" #include "random.hpp" +#include "get_handle.hpp" +#include "workspace.hpp" template <> struct miopen_type : std::integral_constant @@ -288,19 +290,6 @@ bool verify_tensor(tensor& t_gpu, tensor& t_cpu) struct tensor_reorder_base_driver : test_driver { - miopenHandle_t handle{}; -#if MIOPEN_BACKEND_OPENCL - cl_command_queue q{}; -#endif - - tensor_reorder_base_driver() - { - miopenCreate(&handle); -#if MIOPEN_BACKEND_OPENCL - miopenGetStream(handle, &q); -#endif - } - ~tensor_reorder_base_driver() { miopenDestroy(handle); } static std::vector get_dim_3_size() { return {1, 9}; } static std::vector get_dim_2_size() { return {1, 9}; } @@ -365,7 +354,7 @@ struct tensor_reorder_driver : tensor_reorder_base_driver // NOLINTBEGIN(clang-analyzer-cplusplus.NewDeleteLeaks) void run() { - auto run_reorder = [this](uint32_t dim_0, + auto run_reorder = [](uint32_t dim_0, uint32_t dim_1, uint32_t dim_2, uint32_t dim_3, @@ -394,8 +383,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver tensor t_dst_gpu(tensor_len, tensor_strides); rand_tensor_integer(t_src); + auto& handle = get_handle(); miopen::ExecutionContext ctx; - ctx.SetStream(&miopen::deref(this->handle)); + ctx.SetStream(&handle); // ctx.SetupFloats(); auto reorder_sol = MakeTensorReorderAttributes(ctx, to_miopen_data_type::get(), @@ -408,36 +398,15 @@ struct tensor_reorder_driver : tensor_reorder_base_driver order_2, order_3); EXPECT(reorder_sol != nullptr); - size_t workspace = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz + size_t workspace_size = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz : reorder_sol->GetOutputTensorSize(); -#if MIOPEN_BACKEND_OPENCL - cl_context cl_ctx; - clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &cl_ctx, nullptr); - cl_int status = CL_SUCCESS; - cl_mem src_dev = - clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, sizeof(T) * tensor_sz, nullptr, &status); - cl_mem dst_dev = clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, workspace, nullptr, nullptr); - status |= clEnqueueWriteBuffer(q, - src_dev, - CL_TRUE, - 0, - sizeof(T) * tensor_sz, - t_src.data.data(), - 0, - nullptr, - nullptr); - EXPECT(status == CL_SUCCESS); -#elif MIOPEN_BACKEND_HIP - void* src_dev; - void* dst_dev; - EXPECT(hipMalloc(&src_dev, sizeof(T) * tensor_sz) == hipSuccess); - EXPECT(hipMalloc(&dst_dev, workspace) == hipSuccess); - EXPECT(hipMemcpy( - src_dev, t_src.data.data(), sizeof(T) * tensor_sz, hipMemcpyHostToDevice) == - hipSuccess); -#endif + Workspace wspace{}; + wspace.resize(workspace_size); + + auto src_dev = handle.Write(t_src.data); + const auto invoke_param = reorder_invoke_param{ - DataCast(static_cast(src_dev)), DataCast(dst_dev)}; + src_dev.get(), wspace.ptr()}; std::vector opArgs = reorder_sol->GetKernelArg(); boost::optional invoker_factory( [=](const std::vector& kernels) mutable { @@ -454,9 +423,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver std::vector construction_params{ reorder_sol->GetKernelInfo()}; const auto invoker = - miopen::deref(this->handle).PrepareInvoker(*invoker_factory, construction_params); + handle.PrepareInvoker(*invoker_factory, construction_params); // run gpu - invoker(miopen::deref(this->handle), invoke_param); + invoker(handle, invoke_param); // run cpu cpu_reorder::run(t_dst.data.data(), t_src.data.data(), @@ -469,18 +438,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver order_2, order_3); invoker_factory = boost::none; -#if MIOPEN_BACKEND_OPENCL - status = clEnqueueReadBuffer( - q, dst_dev, CL_TRUE, 0, workspace, t_dst_gpu.data.data(), 0, nullptr, nullptr); - EXPECT(status == CL_SUCCESS); - clReleaseMemObject(dst_dev); - clReleaseMemObject(src_dev); -#elif MIOPEN_BACKEND_HIP - EXPECT(hipMemcpy(t_dst_gpu.data.data(), dst_dev, workspace, hipMemcpyDeviceToHost) == - hipSuccess); - hipFree(dst_dev); - hipFree(src_dev); -#endif + + t_dst_gpu.data = wspace.Read(); + // we expect excact match, since use integer bool valid_result = verify_tensor(t_dst_gpu, t_dst); std::cout << "[" << reorder_str::get(order_0, order_1, order_2, order_3) << ", b" diff --git a/test/workspace.hpp b/test/workspace.hpp index 37137918a7..adf62a95ed 100644 --- a/test/workspace.hpp +++ b/test/workspace.hpp @@ -119,9 +119,9 @@ class Workspace { template void Write(const V& vec) { using T = typename V::value_type; - auto bytes = vec.size() * sizeof(T); - resize(bytes); - auto s = hipMemcpyHostToDevice(this->ptr(), &vec[0], bytes); + resize(vec.size() * sizeof(T)); + auto s = hipMemcpy(this->ptr(), &vec[0], size(), + hipMemcpyHostToDevice); if(s != hipSuccess) { abort(); } @@ -132,7 +132,8 @@ class Workspace { using T = typename V::value_type; size_t num_elem = size() / sizeof(T); V ret(num_elem); - auto s = hipMemcpyDeviceToHost(&ret[0], ptr(), size()); + auto s = hipMemcpy(&ret[0], ptr(), size(), + hipMemcpyDeviceToHost); if (s != hipSuccess) { abort(); } From cdb9325b0d3c7aeac2c719cf980a3d94f0c0ebd3 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Wed, 15 Nov 2023 14:57:16 +0000 Subject: [PATCH 06/14] fix a check --- test/lstm_common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index 29f132c1e7..78c639e5d3 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -1235,7 +1235,7 @@ verify_backward_data_lstm::gpu() const size_t reserveSpaceSize = 0; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - if (reserveSpaceSize != RSVgpu.size()) { + if (reserveSpaceSize != (RSVgpu.size() * sizeof(T))) { std::abort(); } Workspace rspace{}; From 71f709b0c71c36968495779b7c0c8150958e0eba Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Wed, 15 Nov 2023 14:57:43 +0000 Subject: [PATCH 07/14] fix format --- test/conv_common.hpp | 8 +- test/find_2_conv.cpp | 13 +- test/gru_common.hpp | 12 +- test/gtest/conv_api_strided_tensors.cpp | 6 +- test/gtest/solver_bwd.hpp | 7 +- test/gtest/solver_fwd.hpp | 7 +- test/gtest/solver_wrw.hpp | 7 +- test/lstm_common.hpp | 21 +-- test/main.cpp | 9 +- test/pooling_common.hpp | 6 +- test/reduce_test.cpp | 40 +++-- test/rnn_vanilla_common.hpp | 10 +- test/tensor_reorder.cpp | 22 ++- test/workspace.hpp | 195 +++++++++++++----------- 14 files changed, 177 insertions(+), 186 deletions(-) diff --git a/test/conv_common.hpp b/test/conv_common.hpp index 5fd71e2f2a..c2665c0072 100644 --- a/test/conv_common.hpp +++ b/test/conv_common.hpp @@ -324,9 +324,9 @@ struct conv_base Workspace wspace{}; wspace.resize(workspace_size); - EXPECT_EQUAL(miopenStatusSuccess, - miopenRunSolution( - handle, solution, 3, arguments, wspace.ptr(), wspace.size())); + EXPECT_EQUAL( + miopenStatusSuccess, + miopenRunSolution(handle, solution, 3, arguments, wspace.ptr(), wspace.size())); } const auto& solution_deref = miopen::deref(solutions.front()); @@ -1211,7 +1211,7 @@ struct verify_backward_conv : conv_base if(selected.workspace_size > 0) { - wspace.resize(selected.workspace_size); + wspace.resize(selected.workspace_size); } filter.ConvolutionBackwardImmediate(handle, diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp index 8ed26211e3..4e1433b1e5 100644 --- a/test/find_2_conv.cpp +++ b/test/find_2_conv.cpp @@ -214,9 +214,9 @@ struct Find2Test : test_driver Workspace wspace{}; wspace.resize(workspace_size); - EXPECT_EQUAL(miopenSetFindOptionPreallocatedWorkspace( - options, wspace.ptr(), wspace.size()), - miopenStatusSuccess); + EXPECT_EQUAL( + miopenSetFindOptionPreallocatedWorkspace(options, wspace.ptr(), wspace.size()), + miopenStatusSuccess); EXPECT_EQUAL(miopenSetFindOptionPreallocatedTensor( options, miopenTensorConvolutionX, x_dev.get()), @@ -329,10 +329,9 @@ struct Find2Test : test_driver arguments[i].buffer = buffers[i]; } - EXPECT_EQUAL( - miopenRunSolution( - handle, solution, 3, arguments.get(), wspace.ptr(), wspace.size()), - miopenStatusSuccess); + EXPECT_EQUAL(miopenRunSolution( + handle, solution, 3, arguments.get(), wspace.ptr(), wspace.size()), + miopenStatusSuccess); }; // Without descriptors diff --git a/test/gru_common.hpp b/test/gru_common.hpp index 736ff96dfc..89b5d4a18d 100644 --- a/test/gru_common.hpp +++ b/test/gru_common.hpp @@ -1964,7 +1964,7 @@ struct verify_forward_infer_gru #endif auto&& handle = get_handle(); - size_t out_sz = 0; + size_t out_sz = 0; size_t workspace_size = 0; std::vector inputCPPDescs; @@ -2250,7 +2250,7 @@ struct verify_forward_train_gru auto&& handle = get_handle(); size_t out_sz = 0; - size_t workspace_size = 0; + size_t workspace_size = 0; size_t reserveSpaceSize = 0; std::vector inputCPPDescs; @@ -2563,7 +2563,7 @@ struct verify_backward_data_gru auto&& handle = get_handle(); - size_t out_sz = 0; + size_t out_sz = 0; std::vector inputCPPDescs; std::vector inputDescs; @@ -2584,9 +2584,9 @@ struct verify_backward_data_gru wspace.resize(workspace_size); miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz); - auto yin_dev = handle.Write(yin); - auto dyin_dev = handle.Write(dy); - auto weights_dev = handle.Write(weights); + auto yin_dev = handle.Write(yin); + auto dyin_dev = handle.Write(dy); + auto weights_dev = handle.Write(weights); Workspace rspace{}; rspace.Write(reserveSpace); diff --git a/test/gtest/conv_api_strided_tensors.cpp b/test/gtest/conv_api_strided_tensors.cpp index 04f3ccbf19..d4e49201f4 100644 --- a/test/gtest/conv_api_strided_tensors.cpp +++ b/test/gtest/conv_api_strided_tensors.cpp @@ -146,9 +146,9 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented) { auto device = Device(handle); - auto d_input = device.Malloc(input_bytes); - auto d_filter = device.Malloc(filter_bytes); - auto d_output = device.Malloc(output_bytes); + auto d_input = device.Malloc(input_bytes); + auto d_filter = device.Malloc(filter_bytes); + auto d_output = device.Malloc(output_bytes); std::fill_n(h_input.begin(), h_input.size(), 1.f); ASSERT_TRUE(d_input.CopyToDevice(h_input.data(), input_bytes)); diff --git a/test/gtest/solver_bwd.hpp b/test/gtest/solver_bwd.hpp index 988368e3c0..6511800ce6 100644 --- a/test/gtest/solver_bwd.hpp +++ b/test/gtest/solver_bwd.hpp @@ -80,11 +80,8 @@ struct ConvBwdSolverTest wspace.resize(cur_sol_ws); } - const auto invoke_params = - miopen::conv::DataInvokeParams{tensors, - wspace.ptr(), - wspace.size(), - conv_desc.attribute.gfx90aFp16alt.GetBwd()}; + const auto invoke_params = miopen::conv::DataInvokeParams{ + tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()}; auto sol = GetSolution(solv, ctx, problem); ASSERT_TRUE(sol.Succeeded()); diff --git a/test/gtest/solver_fwd.hpp b/test/gtest/solver_fwd.hpp index a286cc0bbb..20b16fcc32 100644 --- a/test/gtest/solver_fwd.hpp +++ b/test/gtest/solver_fwd.hpp @@ -81,11 +81,8 @@ struct ConvFwdSolverTest wspace.resize(cur_sol_ws); } - const auto invoke_params = - miopen::conv::DataInvokeParams{tensors, - wspace.ptr(), - wspace.size(), - this->conv_desc.attribute.gfx90aFp16alt.GetFwd()}; + const auto invoke_params = miopen::conv::DataInvokeParams{ + tensors, wspace.ptr(), wspace.size(), this->conv_desc.attribute.gfx90aFp16alt.GetFwd()}; // auto sol = solv.GetSolution(ctx, problem); // This is complicated due to the split between tunable and non-tunable solvers diff --git a/test/gtest/solver_wrw.hpp b/test/gtest/solver_wrw.hpp index 097c9c6122..dcf8311d83 100644 --- a/test/gtest/solver_wrw.hpp +++ b/test/gtest/solver_wrw.hpp @@ -80,11 +80,8 @@ struct ConvWrwSolverTest wspace.resize(cur_sol_ws); } - const auto invoke_params = - miopen::conv::WrWInvokeParams{tensors, - wspace.ptr(), - wspace.size(), - conv_desc.attribute.gfx90aFp16alt.GetBwd()}; + const auto invoke_params = miopen::conv::WrWInvokeParams{ + tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()}; auto sol = GetSolution(solv, ctx, problem); ASSERT_TRUE(sol.Succeeded()); diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index 78c639e5d3..be81761132 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -534,7 +534,7 @@ struct verify_forward_infer_lstm : verify_forward_lstm auto&& handle = get_handle(); - size_t out_sz = 0; + size_t out_sz = 0; std::vector inputCPPDescs; std::vector inputDescs; @@ -865,8 +865,9 @@ struct verify_forward_train_lstm : verify_forward_lstm ChangeDataPadding(*packed_output, output, batch_seq, batch_seq[0], out_h, true); } - if (reserveSpace.size() != RSVcpu.size()) { - std::abort(); + if(reserveSpace.size() != RSVcpu.size()) + { + std::abort(); } std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin()); @@ -915,7 +916,7 @@ struct verify_forward_train_lstm : verify_forward_lstm std::fill(output.begin(), output.end(), static_cast(0)); auto output_dev = handle.Write(output); - size_t workspace_size = 0; + size_t workspace_size = 0; size_t reserveSpaceSize = 0; miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); miopenGetRNNTrainingReserveSize( @@ -1213,7 +1214,6 @@ verify_backward_data_lstm::gpu() const auto&& handle = get_handle(); - std::vector inputCPPDescs; std::vector inputDescs; createTensorDescArray( @@ -1235,15 +1235,16 @@ verify_backward_data_lstm::gpu() const size_t reserveSpaceSize = 0; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - if (reserveSpaceSize != (RSVgpu.size() * sizeof(T))) { - std::abort(); + if(reserveSpaceSize != (RSVgpu.size() * sizeof(T))) + { + std::abort(); } Workspace rspace{}; rspace.Write(RSVgpu); - auto yin_dev = handle.Write(yin); - auto dyin_dev = handle.Write(dy); - auto weights_dev = handle.Write(weights); + auto yin_dev = handle.Write(yin); + auto dyin_dev = handle.Write(dy); + auto weights_dev = handle.Write(weights); std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); diff --git a/test/main.cpp b/test/main.cpp index 7a5d0cd5b0..aeba6e6d23 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -44,10 +44,7 @@ struct handle_fixture { miopenHandle_t handle{}; - handle_fixture() - { - miopenCreate(&handle); - } + handle_fixture() { miopenCreate(&handle); } ~handle_fixture() { miopenDestroy(handle); } }; @@ -167,7 +164,6 @@ struct conv_forward : output_tensor_fixture { float alpha = 1, beta = 0; - int n, h, c, w; STATUS(miopenGet4dTensorDescriptorLengths(inputTensor, &n, &c, &h, &w)); size_t sz_in = static_cast(n) * c * h * w; @@ -200,7 +196,7 @@ struct conv_forward : output_tensor_fixture auto& mhand = get_handle(); - auto in_dev = mhand.Write(in); + auto in_dev = mhand.Write(in); auto wei_dev = mhand.Write(wei); auto out_dev = mhand.Write(out); @@ -271,7 +267,6 @@ struct conv_forward : output_tensor_fixture { CHECK(time == 0.0); } - } }; diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp index a98d9fb82d..6a05a57b40 100644 --- a/test/pooling_common.hpp +++ b/test/pooling_common.hpp @@ -200,8 +200,8 @@ struct verify_forward_pooling auto out = get_output_tensor(filter, input); indices.resize(out.data.size(), 0); - auto in_dev = handle.Write(input.data); - auto out_dev = handle.Create(out.data.size()); + auto in_dev = handle.Write(input.data); + auto out_dev = handle.Create(out.data.size()); Workspace wspace{}; wspace.Write(indices); @@ -217,7 +217,7 @@ struct verify_forward_pooling wspace.ptr(), wspace.size()); - indices = wspace.Read>(); + indices = wspace.Read>(); out.data = handle.Read(out_dev, out.data.size()); return out; } diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index a48d1fc9b8..4c859d0256 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -61,27 +61,25 @@ struct verify_reduce_with_indices miopenReduceTensorIndices_t indicesOpt; miopenIndicesType_t indicesType; - verify_reduce_with_indices( - const miopen::ReduceTensorDescriptor& reduce_, - const tensor& input_, - const tensor& output_, - const tensor& workspace_, - const tensor& indices_, - float alpha_, - float beta_) - : - reduce(reduce_), - input(input_), - output(output_), - workspace(workspace_), - indices(indices_), - alpha(alpha_), - beta(beta_), - reduceOp(reduce.reduceTensorOp_), - compTypeVal(reduce.reduceTensorCompType_), - nanOpt (reduce.reduceTensorNanOpt_), - indicesOpt (reduce.reduceTensorIndices_), - indicesType(reduce.reduceTensorIndicesType_) + verify_reduce_with_indices(const miopen::ReduceTensorDescriptor& reduce_, + const tensor& input_, + const tensor& output_, + const tensor& workspace_, + const tensor& indices_, + float alpha_, + float beta_) + : reduce(reduce_), + input(input_), + output(output_), + workspace(workspace_), + indices(indices_), + alpha(alpha_), + beta(beta_), + reduceOp(reduce.reduceTensorOp_), + compTypeVal(reduce.reduceTensorCompType_), + nanOpt(reduce.reduceTensorNanOpt_), + indicesOpt(reduce.reduceTensorIndices_), + indicesType(reduce.reduceTensorIndicesType_) { } diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp index 9a96cfb106..ce283bddde 100644 --- a/test/rnn_vanilla_common.hpp +++ b/test/rnn_vanilla_common.hpp @@ -1727,7 +1727,6 @@ struct verify_forward_train_rnn std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - std::vector hlens(3, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); hlens[1] = batch_seq[0]; @@ -1768,10 +1767,9 @@ struct verify_forward_train_rnn } #endif - auto retSet = std::make_tuple( - handle.Read(output_dev, output.size()), - (nohy ? initHidden : handle.Read(hy_dev, hy.size())), - rspace.Read>()); + auto retSet = std::make_tuple(handle.Read(output_dev, output.size()), + (nohy ? initHidden : handle.Read(hy_dev, hy.size())), + rspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -2010,7 +2008,7 @@ struct verify_backward_data_rnn // auto dhyin_dev = handle.Write(dhy); Workspace rspace{}; rspace.Write(reserveSpace); - auto weights_dev = handle.Write(weights); + auto weights_dev = handle.Write(weights); // auto hx_dev = handle.Write(initHidden); std::vector hlens(3, 0); diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp index c2ce8a94a9..85210c94b9 100644 --- a/test/tensor_reorder.cpp +++ b/test/tensor_reorder.cpp @@ -355,13 +355,13 @@ struct tensor_reorder_driver : tensor_reorder_base_driver void run() { auto run_reorder = [](uint32_t dim_0, - uint32_t dim_1, - uint32_t dim_2, - uint32_t dim_3, - uint32_t order_0, - uint32_t order_1, - uint32_t order_2, - uint32_t order_3) { + uint32_t dim_1, + uint32_t dim_2, + uint32_t dim_3, + uint32_t order_0, + uint32_t order_1, + uint32_t order_2, + uint32_t order_3) { int tensor_sz = dim_0 * dim_1 * dim_2 * dim_3; std::vector tensor_len({static_cast(dim_0), static_cast(dim_1), @@ -399,14 +399,13 @@ struct tensor_reorder_driver : tensor_reorder_base_driver order_3); EXPECT(reorder_sol != nullptr); size_t workspace_size = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz - : reorder_sol->GetOutputTensorSize(); + : reorder_sol->GetOutputTensorSize(); Workspace wspace{}; wspace.resize(workspace_size); auto src_dev = handle.Write(t_src.data); - const auto invoke_param = reorder_invoke_param{ - src_dev.get(), wspace.ptr()}; + const auto invoke_param = reorder_invoke_param{src_dev.get(), wspace.ptr()}; std::vector opArgs = reorder_sol->GetKernelArg(); boost::optional invoker_factory( [=](const std::vector& kernels) mutable { @@ -422,8 +421,7 @@ struct tensor_reorder_driver : tensor_reorder_base_driver }); std::vector construction_params{ reorder_sol->GetKernelInfo()}; - const auto invoker = - handle.PrepareInvoker(*invoker_factory, construction_params); + const auto invoker = handle.PrepareInvoker(*invoker_factory, construction_params); // run gpu invoker(handle, invoke_param); // run cpu diff --git a/test/workspace.hpp b/test/workspace.hpp index adf62a95ed..b60db33d7e 100644 --- a/test/workspace.hpp +++ b/test/workspace.hpp @@ -29,68 +29,75 @@ #include "get_handle.hpp" -class Workspace { - - // RAII class for hip allocations - class GPUBuffer { - public: - GPUBuffer() = default; - - explicit GPUBuffer(size_t num_bytes) { - assert(num_bytes > 0); - auto s = hipMalloc(&buf_, num_bytes); - if (s != hipSuccess || !buf_) { - std::abort(); - } - } - - ~GPUBuffer() { - auto s = hipFree(buf_); - buf_ = nullptr; - if (s != hipSuccess) { - std::abort(); - } - } +class Workspace +{ + + // RAII class for hip allocations + class GPUBuffer + { + public: + GPUBuffer() = default; + + explicit GPUBuffer(size_t num_bytes) + { + assert(num_bytes > 0); + auto s = hipMalloc(&buf_, num_bytes); + if(s != hipSuccess || !buf_) + { + std::abort(); + } + } + + ~GPUBuffer() + { + auto s = hipFree(buf_); + buf_ = nullptr; + if(s != hipSuccess) + { + std::abort(); + } + } + + void* ptr() { return buf_; } + void* ptr() const { return buf_; } + + GPUBuffer(const GPUBuffer&) = delete; + GPUBuffer& operator=(const GPUBuffer&) = delete; + + GPUBuffer(GPUBuffer&& that) : buf_(std::move(that.buf_)) + { + that.buf_ = nullptr; // take over ownership + } + + GPUBuffer& operator=(GPUBuffer&& that) + { + std::swap(this->buf_, that.buf_); + return *this; + } + + private: + void* buf_ = nullptr; + }; - void* ptr() { return buf_; } - void* ptr() const { return buf_; } +public: + Workspace() = default; - GPUBuffer(const GPUBuffer&) = delete; - GPUBuffer& operator = (const GPUBuffer&) = delete; + Workspace(const Workspace&) = delete; + Workspace& operator=(const Workspace&) = delete; + Workspace(Workspace&&) = default; + Workspace& operator=(Workspace&&) = default; - GPUBuffer(GPUBuffer&& that): - buf_(std::move(that.buf_)) { - that.buf_ = nullptr; // take over ownership - } + size_t size() const { return sz_; } - GPUBuffer& operator = (GPUBuffer&& that) { - std::swap(this->buf_, that.buf_); - return *this; + void resize(size_t sz_in_bytes) + { + sz_ = sz_in_bytes; + AdjustToSize(); } - private: - void* buf_ = nullptr; - }; - -public: - Workspace() = default; - - Workspace(const Workspace&) = delete; - Workspace& operator = (const Workspace&) = delete; - Workspace(Workspace&&) = default; - Workspace& operator = (Workspace&&) = default; - - size_t size() const { return sz_; } - - void resize(size_t sz_in_bytes) { - sz_ = sz_in_bytes; - AdjustToSize(); - } - - - // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is - // passed to capture the size. Must call AdjustToSize() after calling such a method - size_t* SizePtr() { return &sz_; } + // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is + // passed to capture the size. Must call AdjustToSize() after calling such a method + size_t* SizePtr() { return &sz_; } #if 0 auto ptr() const { return data_.get(); } @@ -103,48 +110,52 @@ class Workspace { } } #else - auto ptr() const { return gpu_buf_.ptr(); } - - auto ptr() { return gpu_buf_.ptr(); } - - void AdjustToSize() { - if (sz_ != 0) { - gpu_buf_ = GPUBuffer(sz_); - } else { - gpu_buf_ = GPUBuffer(); + auto ptr() const { return gpu_buf_.ptr(); } + + auto ptr() { return gpu_buf_.ptr(); } + + void AdjustToSize() + { + if(sz_ != 0) + { + gpu_buf_ = GPUBuffer(sz_); + } + else + { + gpu_buf_ = GPUBuffer(); + } } - } #endif - template - void Write(const V& vec) { - using T = typename V::value_type; - resize(vec.size() * sizeof(T)); - auto s = hipMemcpy(this->ptr(), &vec[0], size(), - hipMemcpyHostToDevice); - if(s != hipSuccess) { - abort(); + template + void Write(const V& vec) + { + using T = typename V::value_type; + resize(vec.size() * sizeof(T)); + auto s = hipMemcpy(this->ptr(), &vec[0], size(), hipMemcpyHostToDevice); + if(s != hipSuccess) + { + abort(); + } } - } - template - V Read() const { - using T = typename V::value_type; - size_t num_elem = size() / sizeof(T); - V ret(num_elem); - auto s = hipMemcpy(&ret[0], ptr(), size(), - hipMemcpyDeviceToHost); - if (s != hipSuccess) { - abort(); + template + V Read() const + { + using T = typename V::value_type; + size_t num_elem = size() / sizeof(T); + V ret(num_elem); + auto s = hipMemcpy(&ret[0], ptr(), size(), hipMemcpyDeviceToHost); + if(s != hipSuccess) + { + abort(); + } + return ret; } - return ret; - } private: - // miopen::Handle& handle_; - // miopen::Allocator::ManageDataPtr data_{}; - GPUBuffer gpu_buf_{}; - size_t sz_{}; - - + // miopen::Handle& handle_; + // miopen::Allocator::ManageDataPtr data_{}; + GPUBuffer gpu_buf_{}; + size_t sz_{}; }; From ae5d50e412dfc99180697283e284e957ae678816 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Wed, 15 Nov 2023 16:05:13 +0000 Subject: [PATCH 08/14] missed some instances --- test/gru_common.hpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/test/gru_common.hpp b/test/gru_common.hpp index 89b5d4a18d..4f2072ec8f 100644 --- a/test/gru_common.hpp +++ b/test/gru_common.hpp @@ -1982,7 +1982,6 @@ struct verify_forward_infer_gru miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - std::vector workSpace(workspace_size / sizeof(T)); auto input_dev = handle.Write(input); @@ -1995,7 +1994,9 @@ struct verify_forward_infer_gru std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - auto workSpace_dev = handle.Write(workSpace); + + Workspace wspace{}; + wspace.resize(workspace_size); std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); @@ -2028,8 +2029,8 @@ struct verify_forward_infer_gru ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); #if(MIO_GRU_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -2267,11 +2268,13 @@ struct verify_forward_train_gru miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + Workspace wspace{}; + wspace.resize(workspace_size); + miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - - std::vector workSpace(workspace_size / sizeof(T)); - std::vector reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); + Workspace rspace{}; + rspace.resize(reserveSpaceSize); auto input_dev = handle.Write(input); @@ -2285,9 +2288,6 @@ struct verify_forward_train_gru std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace); - std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; @@ -2319,10 +2319,10 @@ struct verify_forward_train_gru ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workspace_size, - reserveSpace_dev.get(), - reserveSpaceSize); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_GRU_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -2335,7 +2335,7 @@ struct verify_forward_train_gru auto retSet = std::make_tuple( handle.Read(output_dev, output.size()), (nohy ? initHidden : handle.Read(hy_dev, hy.size())), - handle.Read(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T))); + rspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); From 83d622b849c16c5d888b4275e95aa2e10e081d4e Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Wed, 15 Nov 2023 17:33:15 +0000 Subject: [PATCH 09/14] bring back the workspace round up logic --- test/gru_common.hpp | 6 ++++-- test/lstm_common.hpp | 11 +++++++---- test/rnn_vanilla_common.hpp | 6 ++++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/test/gru_common.hpp b/test/gru_common.hpp index 4f2072ec8f..c86fe15e47 100644 --- a/test/gru_common.hpp +++ b/test/gru_common.hpp @@ -1981,6 +1981,8 @@ struct verify_forward_infer_gru miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + Workspace wspace{}; + wspace.resize(workspace_size); auto input_dev = handle.Write(input); @@ -1995,8 +1997,6 @@ struct verify_forward_infer_gru auto hy_dev = handle.Write(hy); - Workspace wspace{}; - wspace.resize(workspace_size); std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); @@ -2273,6 +2273,8 @@ struct verify_forward_train_gru miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); + reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) & ~(sizeof(T) - 1); + assert(reserveSpaceSize % sizeof(T) == 0); Workspace rspace{}; rspace.resize(reserveSpaceSize); diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index be81761132..17217c763d 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -917,14 +917,15 @@ struct verify_forward_train_lstm : verify_forward_lstm auto output_dev = handle.Write(output); size_t workspace_size = 0; - size_t reserveSpaceSize = 0; miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + Workspace wspace{}; + wspace.resize(workspace_size); + + size_t reserveSpaceSize = 0; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - - Workspace wspace{}; + reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1); Workspace rspace{}; - wspace.resize(workspace_size); rspace.resize(reserveSpaceSize); auto weights_dev = handle.Write(weights); @@ -1235,6 +1236,8 @@ verify_backward_data_lstm::gpu() const size_t reserveSpaceSize = 0; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); + reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1); + if(reserveSpaceSize != (RSVgpu.size() * sizeof(T))) { std::abort(); diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp index ce283bddde..9b8d89e908 100644 --- a/test/rnn_vanilla_common.hpp +++ b/test/rnn_vanilla_common.hpp @@ -1707,11 +1707,13 @@ struct verify_forward_train_rnn miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + Workspace wspace{}; + wspace.resize(workSpaceSize); + miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); + reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1); - Workspace wspace{}; - wspace.resize(workSpaceSize); Workspace rspace{}; rspace.resize(reserveSpaceSize); From 4bd42530031b68311c1a65439cccca826c556ef3 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Wed, 15 Nov 2023 17:33:35 +0000 Subject: [PATCH 10/14] formatting --- test/gru_common.hpp | 10 +++------- test/lstm_common.hpp | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/test/gru_common.hpp b/test/gru_common.hpp index c86fe15e47..56666795fb 100644 --- a/test/gru_common.hpp +++ b/test/gru_common.hpp @@ -1984,7 +1984,6 @@ struct verify_forward_infer_gru Workspace wspace{}; wspace.resize(workspace_size); - auto input_dev = handle.Write(input); miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz); @@ -1996,8 +1995,6 @@ struct verify_forward_infer_gru std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - - std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; @@ -2334,10 +2331,9 @@ struct verify_forward_train_gru } #endif - auto retSet = std::make_tuple( - handle.Read(output_dev, output.size()), - (nohy ? initHidden : handle.Read(hy_dev, hy.size())), - rspace.Read>()); + auto retSet = std::make_tuple(handle.Read(output_dev, output.size()), + (nohy ? initHidden : handle.Read(hy_dev, hy.size())), + rspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index 17217c763d..a319fb3b7a 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -916,7 +916,7 @@ struct verify_forward_train_lstm : verify_forward_lstm std::fill(output.begin(), output.end(), static_cast(0)); auto output_dev = handle.Write(output); - size_t workspace_size = 0; + size_t workspace_size = 0; miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); Workspace wspace{}; wspace.resize(workspace_size); From 6eef5fe3bcbc55a08c009e50a64d0a8181534acc Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Thu, 16 Nov 2023 13:46:57 +0000 Subject: [PATCH 11/14] fix hip tidy issues. add more checks --- test/lstm_common.hpp | 6 +++++- test/workspace.hpp | 16 ++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index a319fb3b7a..75477a4c88 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -1092,7 +1092,10 @@ verify_backward_data_lstm::cpu() const reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) / sizeof(T); } - std::vector reserveSpace(RSVcpu.begin(), RSVcpu.begin() + reserveSpaceSize); + if (reserveSpaceSize != RSVcpu.size()) { + std::abort(); + } + std::vector reserveSpace(RSVcpu); std::vector converted_dinput; std::vector converted_output; @@ -1298,6 +1301,7 @@ verify_backward_data_lstm::gpu() const rspace.ptr(), rspace.size()); + assert(RSVgpu.size() * sizeof(T) == rspace.size()); RSVgpu = rspace.Read>(); // TODO: remove workSpace auto retSet = std::make_tuple(handle.Read(dx_dev, dx.size()), diff --git a/test/workspace.hpp b/test/workspace.hpp index b60db33d7e..33f6656e09 100644 --- a/test/workspace.hpp +++ b/test/workspace.hpp @@ -42,7 +42,7 @@ class Workspace { assert(num_bytes > 0); auto s = hipMalloc(&buf_, num_bytes); - if(s != hipSuccess || !buf_) + if(s != hipSuccess || buf_ == nullptr) { std::abort(); } @@ -64,7 +64,7 @@ class Workspace GPUBuffer(const GPUBuffer&) = delete; GPUBuffer& operator=(const GPUBuffer&) = delete; - GPUBuffer(GPUBuffer&& that) : buf_(std::move(that.buf_)) + GPUBuffer(GPUBuffer&& that) : buf_(that.buf_) { that.buf_ = nullptr; // take over ownership } @@ -99,17 +99,6 @@ class Workspace // passed to capture the size. Must call AdjustToSize() after calling such a method size_t* SizePtr() { return &sz_; } -#if 0 - auto ptr() const { return data_.get(); } - auto ptr() { return data_.get(); } - void AdjustToSize() { - if (sz_ == 0) { - data_.reset(); - } else { - data_ = handle_.Create(sz_); - } - } -#else auto ptr() const { return gpu_buf_.ptr(); } auto ptr() { return gpu_buf_.ptr(); } @@ -125,7 +114,6 @@ class Workspace gpu_buf_ = GPUBuffer(); } } -#endif template void Write(const V& vec) From f22aa2d0b9d821ed6595984bb99bd350cb6caac8 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Thu, 16 Nov 2023 17:14:53 +0000 Subject: [PATCH 12/14] fix a bug with zeroing out rnn workspace --- test/lstm_common.hpp | 24 ++++++++++++++++++------ test/workspace.hpp | 21 ++++++++++++++++----- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index 75477a4c88..bce643726b 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -977,7 +977,7 @@ struct verify_forward_train_lstm : verify_forward_lstm printf("GPU outdata[%d]: %f\n", i, outdata[i]); } #endif - RSVgpu = rspace.Read>(); + rspace.ReadTo(RSVgpu); std::vector output_gpu = handle.Read(output_dev, output.size()); @@ -1092,8 +1092,9 @@ verify_backward_data_lstm::cpu() const reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) / sizeof(T); } - if (reserveSpaceSize != RSVcpu.size()) { - std::abort(); + if(reserveSpaceSize != RSVcpu.size()) + { + std::abort(); } std::vector reserveSpace(RSVcpu); @@ -1233,12 +1234,23 @@ verify_backward_data_lstm::gpu() const size_t workspace_size = 0; miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + if(workspace_size % sizeof(T) != 0) + { + std::abort(); + } Workspace wspace{}; - wspace.resize(workspace_size); + // Needed to zero out the workspace (happens in std::vector's constructor) + // or else this test fails verification when workspace is compared against the + // workspace returned by ::cpu method in this class + wspace.Write(std::vector(workspace_size / sizeof(T))); + // wspace.resize(workspace_size); size_t reserveSpaceSize = 0; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); + /// \todo: fix miopenGetRNNTrainingReserveSize to return a multiple of + /// sizeof(T) + // Needed because reserveSpaceSize returned is not a multiple of sizeof(T). reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1); if(reserveSpaceSize != (RSVgpu.size() * sizeof(T))) @@ -1302,7 +1314,7 @@ verify_backward_data_lstm::gpu() const rspace.size()); assert(RSVgpu.size() * sizeof(T) == rspace.size()); - RSVgpu = rspace.Read>(); + rspace.ReadTo(RSVgpu); // TODO: remove workSpace auto retSet = std::make_tuple(handle.Read(dx_dev, dx.size()), (nodhx ? initHidden : handle.Read(dhx_dev, dhx.size())), @@ -1831,7 +1843,7 @@ struct lstm_basic_driver : test_driver #if(MIO_LSTM_TEST_DEBUG > 0) printf("Running backward weights LSTM.\n"); printf("reserve sz: %d, workSpace sz: %d, weight sz: %d\n", - reserveSpaceBwdData.size(), + rsvcpu.size(), workSpaceBwdData.size(), wei_sz); fflush(nullptr); diff --git a/test/workspace.hpp b/test/workspace.hpp index 33f6656e09..04ffc2be39 100644 --- a/test/workspace.hpp +++ b/test/workspace.hpp @@ -128,16 +128,27 @@ class Workspace } template - V Read() const + void ReadTo(V& vec) const { - using T = typename V::value_type; - size_t num_elem = size() / sizeof(T); - V ret(num_elem); - auto s = hipMemcpy(&ret[0], ptr(), size(), hipMemcpyDeviceToHost); + using T = typename V::value_type; + if(vec.size() * sizeof(T) != size()) + { + std::abort(); + } + auto s = hipMemcpy(&vec[0], ptr(), size(), hipMemcpyDeviceToHost); if(s != hipSuccess) { abort(); } + } + + template + V Read() const + { + using T = typename V::value_type; + size_t num_elem = size() / sizeof(T); + V ret(num_elem); + ReadTo(ret); return ret; } From d6ae39fb5b551f3bba9964c43cc24a6db3654640 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Thu, 16 Nov 2023 23:19:50 +0000 Subject: [PATCH 13/14] fix hip-tidy error --- test/workspace.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/workspace.hpp b/test/workspace.hpp index 04ffc2be39..3f9e8aae1b 100644 --- a/test/workspace.hpp +++ b/test/workspace.hpp @@ -64,12 +64,12 @@ class Workspace GPUBuffer(const GPUBuffer&) = delete; GPUBuffer& operator=(const GPUBuffer&) = delete; - GPUBuffer(GPUBuffer&& that) : buf_(that.buf_) + GPUBuffer(GPUBuffer&& that) noexcept : buf_(that.buf_) { that.buf_ = nullptr; // take over ownership } - GPUBuffer& operator=(GPUBuffer&& that) + GPUBuffer& operator=(GPUBuffer&& that) noexcept { std::swap(this->buf_, that.buf_); return *this; From 98ec151e8c163a553cb6fa4342f919c775e00b30 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Tue, 21 Nov 2023 18:51:15 +0000 Subject: [PATCH 14/14] address review comments --- test/conv_common.hpp | 6 +-- test/ctc.cpp | 3 +- test/find_2_conv.cpp | 6 +-- test/find_db.cpp | 12 ++---- test/gru_common.hpp | 12 ++---- test/lstm_common.hpp | 9 ++-- test/main.cpp | 3 +- test/rnn_vanilla_common.hpp | 12 ++---- test/tensor_reorder.cpp | 3 +- test/workspace.hpp | 85 +++++++++++++++++++------------------ 10 files changed, 65 insertions(+), 86 deletions(-) diff --git a/test/conv_common.hpp b/test/conv_common.hpp index 7f595facb4..e387e98b5b 100644 --- a/test/conv_common.hpp +++ b/test/conv_common.hpp @@ -331,8 +331,7 @@ struct conv_base EXPECT_EQUAL(miopenStatusSuccess, miopenGetSolutionWorkspaceSize(solution, &workspace_size)); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{workspace_size}; EXPECT_EQUAL( miopenStatusSuccess, @@ -1729,8 +1728,7 @@ struct verify_forward_conv_int8 : conv_base wei_vpad_dev.get()); } - Workspace wspace{}; - wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); + Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)}; int ret_algo_count; miopenConvAlgoPerf_t perf; diff --git a/test/ctc.cpp b/test/ctc.cpp index f53a6f04e3..1c759220f2 100644 --- a/test/ctc.cpp +++ b/test/ctc.cpp @@ -652,8 +652,7 @@ struct verify_ctcloss inputLengths.data(), miopenCTCLossAlgo_t(0)); - Workspace wspace{}; - wspace.resize(workSpaceSize); + Workspace wspace{workSpaceSize}; auto losses_gpu = losses; auto grads_gpu = grads; diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp index 4e1433b1e5..6e636e265f 100644 --- a/test/find_2_conv.cpp +++ b/test/find_2_conv.cpp @@ -211,8 +211,7 @@ struct Find2Test : test_driver } const auto workspace_size = std::min(workspace_limit, workspace_max); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{workspace_size}; EXPECT_EQUAL( miopenSetFindOptionPreallocatedWorkspace(options, wspace.ptr(), wspace.size()), @@ -316,8 +315,7 @@ struct Find2Test : test_driver EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size), miopenStatusSuccess); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{workspace_size}; const auto checked_run_solution = [&](miopenTensorDescriptor_t* descriptors_) { auto arguments = std::make_unique(num_arguments); diff --git a/test/find_db.cpp b/test/find_db.cpp index 3977f9e220..f37b6e8595 100644 --- a/test/find_db.cpp +++ b/test/find_db.cpp @@ -107,9 +107,7 @@ struct FindDbTest : test_driver const auto ctx = ExecutionContext{&handle}; const auto problem = conv::ProblemDescription{y.desc, w.desc, x.desc, filter, conv::Direction::BackwardData}; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)}; auto filterCall = [&]() { int ret_algo_count; @@ -140,9 +138,7 @@ struct FindDbTest : test_driver const auto ctx = ExecutionContext{&handle}; const auto problem = conv::ProblemDescription{x.desc, w.desc, y.desc, filter, conv::Direction::Forward}; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)}; auto filterCall = [&]() { int ret_algo_count; @@ -173,9 +169,7 @@ struct FindDbTest : test_driver const auto ctx = ExecutionContext{&handle}; const auto problem = conv::ProblemDescription{ y.desc, w.desc, x.desc, filter, conv::Direction::BackwardWeights}; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)}; auto filterCall = [&]() { int ret_algo_count; diff --git a/test/gru_common.hpp b/test/gru_common.hpp index 56666795fb..4df6209e9d 100644 --- a/test/gru_common.hpp +++ b/test/gru_common.hpp @@ -1981,8 +1981,7 @@ struct verify_forward_infer_gru miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{workspace_size}; auto input_dev = handle.Write(input); @@ -2265,15 +2264,13 @@ struct verify_forward_train_gru miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{workspace_size}; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) & ~(sizeof(T) - 1); assert(reserveSpaceSize % sizeof(T) == 0); - Workspace rspace{}; - rspace.resize(reserveSpaceSize); + Workspace rspace{reserveSpaceSize}; auto input_dev = handle.Write(input); @@ -2578,8 +2575,7 @@ struct verify_backward_data_gru size_t workspace_size = 0; miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{workspace_size}; miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz); auto yin_dev = handle.Write(yin); diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index bce643726b..569927883e 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -552,8 +552,7 @@ struct verify_forward_infer_lstm : verify_forward_lstm size_t workspace_size = 0; miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{workspace_size}; auto input_dev = handle.Write(input); @@ -918,15 +917,13 @@ struct verify_forward_train_lstm : verify_forward_lstm size_t workspace_size = 0; miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{workspace_size}; size_t reserveSpaceSize = 0; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1); - Workspace rspace{}; - rspace.resize(reserveSpaceSize); + Workspace rspace{reserveSpaceSize}; auto weights_dev = handle.Write(weights); diff --git a/test/main.cpp b/test/main.cpp index 1cd0a402b8..a1efb2eff8 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -174,8 +174,7 @@ struct conv_forward : output_tensor_fixture STATUS(miopenConvolutionForwardGetWorkSpaceSize( handle, convFilter, inputTensor, convDesc, outputTensor, &sz_fwd_workspace)); - Workspace wspace{}; - wspace.resize(sz_fwd_workspace); + Workspace wspace{sz_fwd_workspace}; std::vector in(sz_in); std::vector wei(sz_wei); diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp index 9b8d89e908..854f682068 100644 --- a/test/rnn_vanilla_common.hpp +++ b/test/rnn_vanilla_common.hpp @@ -1437,8 +1437,7 @@ struct verify_forward_infer_rnn miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - Workspace wspace{}; - wspace.resize(workSpaceSize); + Workspace wspace{workSpaceSize}; auto input_dev = handle.Write(input); @@ -1707,15 +1706,13 @@ struct verify_forward_train_rnn miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - Workspace wspace{}; - wspace.resize(workSpaceSize); + Workspace wspace{workSpaceSize}; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1); - Workspace rspace{}; - rspace.resize(reserveSpaceSize); + Workspace rspace{reserveSpaceSize}; auto input_dev = handle.Write(input); @@ -2001,8 +1998,7 @@ struct verify_backward_data_rnn miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - Workspace wspace{}; - wspace.resize(workSpaceSize); + Workspace wspace{workSpaceSize}; miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz); auto yin_dev = handle.Write(yin); diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp index b4a7208798..bf40e7ee38 100644 --- a/test/tensor_reorder.cpp +++ b/test/tensor_reorder.cpp @@ -398,8 +398,7 @@ struct tensor_reorder_driver : tensor_reorder_base_driver EXPECT(reorder_sol != nullptr); size_t workspace_size = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz : reorder_sol->GetOutputTensorSize(); - Workspace wspace{}; - wspace.resize(workspace_size); + Workspace wspace{workspace_size}; auto src_dev = handle.Write(t_src.data); diff --git a/test/workspace.hpp b/test/workspace.hpp index 3f9e8aae1b..93522b1cd4 100644 --- a/test/workspace.hpp +++ b/test/workspace.hpp @@ -29,6 +29,12 @@ #include "get_handle.hpp" +#define HIP_CHECK(exp) \ + if((exp) != hipSuccess) \ + { \ + MIOPEN_LOG_E(#exp "failed at line: " << __LINE__ << " in file: " << __FILE__); \ + } + class Workspace { @@ -38,49 +44,69 @@ class Workspace public: GPUBuffer() = default; - explicit GPUBuffer(size_t num_bytes) + explicit GPUBuffer(size_t num_bytes) : sz_(num_bytes) { - assert(num_bytes > 0); - auto s = hipMalloc(&buf_, num_bytes); - if(s != hipSuccess || buf_ == nullptr) + if(num_bytes > 0) { - std::abort(); + HIP_CHECK(hipMalloc(&buf_, num_bytes)); + assert(buf_ != nullptr); } - } - - ~GPUBuffer() - { - auto s = hipFree(buf_); - buf_ = nullptr; - if(s != hipSuccess) + else { - std::abort(); + buf_ = nullptr; } } + ~GPUBuffer() { FreeBuf(); } + void* ptr() { return buf_; } void* ptr() const { return buf_; } + auto size() const { return sz_; } + GPUBuffer(const GPUBuffer&) = delete; GPUBuffer& operator=(const GPUBuffer&) = delete; - GPUBuffer(GPUBuffer&& that) noexcept : buf_(that.buf_) + GPUBuffer(GPUBuffer&& that) noexcept : buf_(that.buf_), sz_(that.sz_) { that.buf_ = nullptr; // take over ownership + that.sz_ = 0; } GPUBuffer& operator=(GPUBuffer&& that) noexcept { + FreeBuf(); std::swap(this->buf_, that.buf_); + std::swap(this->sz_, that.sz_); return *this; } private: + void FreeBuf() + { + HIP_CHECK(hipFree(buf_)); + buf_ = nullptr; + sz_ = 0; + } + void* buf_ = nullptr; + size_t sz_ = 0; }; + // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is + // passed to capture the size. Must call AdjustToSize() after calling such a method + size_t* SizePtr() { return &sz_; } + + void AdjustToSize() + { + if(sz_ != gpu_buf_.size()) + { + gpu_buf_ = GPUBuffer(sz_); + } + } + public: - Workspace() = default; + explicit Workspace(size_t sz = 0) : sz_(sz) { AdjustToSize(); } Workspace(const Workspace&) = delete; Workspace& operator=(const Workspace&) = delete; @@ -95,36 +121,16 @@ class Workspace AdjustToSize(); } - // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is - // passed to capture the size. Must call AdjustToSize() after calling such a method - size_t* SizePtr() { return &sz_; } - auto ptr() const { return gpu_buf_.ptr(); } auto ptr() { return gpu_buf_.ptr(); } - void AdjustToSize() - { - if(sz_ != 0) - { - gpu_buf_ = GPUBuffer(sz_); - } - else - { - gpu_buf_ = GPUBuffer(); - } - } - template void Write(const V& vec) { using T = typename V::value_type; resize(vec.size() * sizeof(T)); - auto s = hipMemcpy(this->ptr(), &vec[0], size(), hipMemcpyHostToDevice); - if(s != hipSuccess) - { - abort(); - } + HIP_CHECK(hipMemcpy(this->ptr(), &vec[0], size(), hipMemcpyHostToDevice)); } template @@ -133,13 +139,10 @@ class Workspace using T = typename V::value_type; if(vec.size() * sizeof(T) != size()) { + MIOPEN_LOG_E("vector of wrong size passed"); std::abort(); } - auto s = hipMemcpy(&vec[0], ptr(), size(), hipMemcpyDeviceToHost); - if(s != hipSuccess) - { - abort(); - } + HIP_CHECK(hipMemcpy(&vec[0], ptr(), size(), hipMemcpyDeviceToHost)); } template