From bef03d674273d049bffba9a538c2b917a80150fb Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Fri, 15 Dec 2023 15:01:11 -0800 Subject: [PATCH 1/4] [HotFix][atamazov] multiple undefined behavior discovered with -fsanitize=undefined in DEV builds (#2609) * fix-issue-2602(01) Fix for smoke_miopendriver_gemm * Do not print output parameters in MIOPEN_LOG_FUNCTION calls. --------- Co-authored-by: atamazov --- driver/gemm_driver.hpp | 10 +++++---- src/activ_api.cpp | 2 +- src/api/find2_0_commons.cpp | 10 ++++----- src/convolution_api.cpp | 35 ++++++++++++++--------------- src/ctc_api.cpp | 12 +++------- src/dropout_api.cpp | 9 +++----- src/fused_api.cpp | 4 ++-- src/include/miopen/gemm_v2.hpp | 2 +- src/lrn_api.cpp | 4 ++-- src/pooling_api.cpp | 19 +++++++--------- src/reducetensor_api.cpp | 11 +++------- src/rnn_api.cpp | 40 +++++++++++++--------------------- src/sum_api.cpp | 2 +- src/tensor_api.cpp | 8 +++---- 14 files changed, 70 insertions(+), 98 deletions(-) diff --git a/driver/gemm_driver.hpp b/driver/gemm_driver.hpp index f464d89270..12b615f405 100644 --- a/driver/gemm_driver.hpp +++ b/driver/gemm_driver.hpp @@ -143,7 +143,8 @@ class GemmDriver : public Driver T alpha, beta; - miopen::GemmDescriptor gemm_desc; + miopen::GemmDescriptor gemm_desc = { + false, false, false, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.0f, 0.0f, miopenFloat, false}; }; template @@ -198,13 +199,13 @@ int GemmDriver::GetandSetData() gemm_desc.a_cast_type = data_type; gemm_desc.b_cast_type = data_type; - gemm_desc.isColMajor = inflags.GetValueInt("isColMajor"); + gemm_desc.isColMajor = inflags.GetValueInt("isColMajor") != 0; gemm_desc.m = inflags.GetValueInt("a_h"); gemm_desc.k = inflags.GetValueInt("a_w"); gemm_desc.n = inflags.GetValueInt("b_w"); - gemm_desc.transA = inflags.GetValueInt("transA"); - gemm_desc.transB = inflags.GetValueInt("transB"); + gemm_desc.transA = inflags.GetValueInt("transA") != 0; + gemm_desc.transB = inflags.GetValueInt("transB") != 0; gemm_desc.alpha = inflags.GetValueDouble("alpha"); gemm_desc.beta = inflags.GetValueDouble("beta"); @@ -225,6 +226,7 @@ int GemmDriver::GetandSetData() gemm_desc.strideB = gemm_desc.k * gemm_desc.n; gemm_desc.strideC = gemm_desc.m * gemm_desc.n; + gemm_desc.deterministic = false; return (0); } diff --git a/src/activ_api.cpp b/src/activ_api.cpp index 8fe0373d63..05ccdbe708 100644 --- a/src/activ_api.cpp +++ b/src/activ_api.cpp @@ -60,7 +60,7 @@ extern "C" miopenStatus_t miopenGetActivationDescriptor(miopenActivationDescript double* activGamma) { - MIOPEN_LOG_FUNCTION(activDesc, mode, activAlpha, activBeta, activGamma); + MIOPEN_LOG_FUNCTION(activDesc); return miopen::try_([&] { *mode = miopen::deref(activDesc).GetMode(); *activAlpha = miopen::deref(activDesc).GetAlpha(); diff --git a/src/api/find2_0_commons.cpp b/src/api/find2_0_commons.cpp index 5428edf3a0..ce203197a5 100644 --- a/src/api/find2_0_commons.cpp +++ b/src/api/find2_0_commons.cpp @@ -354,7 +354,7 @@ miopenStatus_t miopenSaveSolution(miopenSolution_t solution, char* data) miopenStatus_t miopenGetSolutionSize(miopenSolution_t solution, size_t* size) { - MIOPEN_LOG_FUNCTION(solution, size); + MIOPEN_LOG_FUNCTION(solution); return miopen::try_([&] { if(size == nullptr) @@ -374,7 +374,7 @@ miopenStatus_t miopenGetSolutionSize(miopenSolution_t solution, size_t* size) miopenStatus_t miopenGetSolutionWorkspaceSize(miopenSolution_t solution, size_t* workspaceSize) { - MIOPEN_LOG_FUNCTION(solution, workspaceSize); + MIOPEN_LOG_FUNCTION(solution); return miopen::try_([&] { const auto& solution_deref = miopen::deref(solution); @@ -384,7 +384,7 @@ miopenStatus_t miopenGetSolutionWorkspaceSize(miopenSolution_t solution, size_t* miopenStatus_t miopenGetSolutionTime(miopenSolution_t solution, float* time) { - MIOPEN_LOG_FUNCTION(solution, time); + MIOPEN_LOG_FUNCTION(solution); return miopen::try_([&] { const auto& solution_deref = miopen::deref(solution); @@ -394,7 +394,7 @@ miopenStatus_t miopenGetSolutionTime(miopenSolution_t solution, float* time) miopenStatus_t miopenGetSolutionSolverId(miopenSolution_t solution, uint64_t* solverId) { - MIOPEN_LOG_FUNCTION(solution, solverId); + MIOPEN_LOG_FUNCTION(solution); return miopen::try_([&] { const auto& solution_deref = miopen::deref(solution); @@ -404,7 +404,7 @@ miopenStatus_t miopenGetSolutionSolverId(miopenSolution_t solution, uint64_t* so miopenStatus_t miopenGetSolverIdConvAlgorithm(uint64_t solverId, miopenConvAlgorithm_t* result) { - MIOPEN_LOG_FUNCTION(solverId, result); + MIOPEN_LOG_FUNCTION(solverId); return miopen::try_([&] { const auto id_deref = miopen::solver::Id{solverId}; diff --git a/src/convolution_api.cpp b/src/convolution_api.cpp index 6599bfd787..6929512e05 100644 --- a/src/convolution_api.cpp +++ b/src/convolution_api.cpp @@ -166,7 +166,7 @@ extern "C" miopenStatus_t miopenInitConvolutionNdDescriptor(miopenConvolutionDes extern "C" miopenStatus_t miopenGetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc, int* groupCount) { - MIOPEN_LOG_FUNCTION(convDesc, groupCount); + MIOPEN_LOG_FUNCTION(convDesc); return miopen::try_([&] { miopen::deref(groupCount) = miopen::deref(convDesc).group_count; }); } @@ -254,7 +254,7 @@ extern "C" miopenStatus_t miopenGetConvolutionDescriptor(miopenConvolutionDescri int* dilation_h, int* dilation_w) { - MIOPEN_LOG_FUNCTION(convDesc, c_mode, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w); + MIOPEN_LOG_FUNCTION(convDesc); return miopen::try_([&] { if(miopen::deref(convDesc).GetSpatialDimension() != 2) { @@ -279,8 +279,7 @@ extern "C" miopenStatus_t miopenGetConvolutionNdDescriptor(miopenConvolutionDesc int* dilationA, miopenConvolutionMode_t* c_mode) { - MIOPEN_LOG_FUNCTION( - convDesc, requestedSpatialDim, spatialDim, padA, strideA, dilationA, c_mode); + MIOPEN_LOG_FUNCTION(convDesc, requestedSpatialDim); return miopen::try_([&] { int spatial_dim = miopen::deref(convDesc).GetSpatialDimension(); if(spatial_dim < requestedSpatialDim) @@ -305,7 +304,7 @@ extern "C" miopenStatus_t miopenGetConvolutionNdDescriptor(miopenConvolutionDesc extern "C" miopenStatus_t miopenGetConvolutionSpatialDim(miopenConvolutionDescriptor_t convDesc, int* spatialDim) { - MIOPEN_LOG_FUNCTION(convDesc, spatialDim); + MIOPEN_LOG_FUNCTION(convDesc); return miopen::try_( [&] { miopen::deref(spatialDim) = miopen::deref(convDesc).GetSpatialDimension(); }); } @@ -319,7 +318,7 @@ miopenGetConvolutionForwardOutputDim(miopenConvolutionDescriptor_t convDesc, int* h, int* w) { - MIOPEN_LOG_FUNCTION(convDesc, inputTensorDesc, filterDesc, n, c, h, w); + MIOPEN_LOG_FUNCTION(convDesc, inputTensorDesc, filterDesc); return miopen::try_([&] { if(miopen::deref(convDesc).GetSpatialDimension() != 2) { @@ -340,7 +339,7 @@ miopenGetConvolutionNdForwardOutputDim(miopenConvolutionDescriptor_t convDesc, int* nDim, int* outputTensorDimA) { - MIOPEN_LOG_FUNCTION(convDesc, inputTensorDesc, filterDesc, nDim, outputTensorDimA); + MIOPEN_LOG_FUNCTION(convDesc, inputTensorDesc, filterDesc); return miopen::try_([&] { auto out_desc = miopen::deref(convDesc).GetForwardOutputTensor( miopen::deref(inputTensorDesc), miopen::deref(filterDesc)); @@ -369,7 +368,7 @@ miopenConvolutionForwardGetWorkSpaceSize(miopenHandle_t handle, size_t* workSpaceSize) { - MIOPEN_LOG_FUNCTION(handle, wDesc, xDesc, convDesc, yDesc, workSpaceSize); + MIOPEN_LOG_FUNCTION(handle, wDesc, xDesc, convDesc, yDesc); return miopen::try_([&] { auto ctx = ExecutionContext{}; auto problem = ProblemDescription{}; @@ -690,7 +689,7 @@ miopenConvolutionForwardGetSolutionWorkspaceSize(miopenHandle_t handle, const uint64_t solution_id, size_t* workSpaceSize) { - MIOPEN_LOG_FUNCTION(handle, wDesc, xDesc, convDesc, yDesc, solution_id, workSpaceSize); + MIOPEN_LOG_FUNCTION(handle, wDesc, xDesc, convDesc, yDesc, solution_id); return miopen::try_([&] { if(miopen::deref(convDesc).mode == miopenTranspose) { @@ -806,7 +805,7 @@ miopenConvolutionBackwardDataGetSolution(miopenHandle_t handle, size_t* solutionCount, miopenConvSolution_t* solutions) { - MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, maxSolutionCount, solutionCount); + MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, maxSolutionCount); return miopen::try_([&] { auto ctx = ExecutionContext{}; auto problem = ProblemDescription{}; @@ -829,7 +828,7 @@ miopenConvolutionBackwardDataGetSolutionWorkspaceSize(miopenHandle_t handle, const uint64_t solution_id, size_t* workSpaceSize) { - MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, solution_id, workSpaceSize); + MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, solution_id); return miopen::try_([&] { if(miopen::deref(convDesc).mode == miopenTranspose) { @@ -924,7 +923,7 @@ miopenConvolutionBackwardWeightsGetSolutionCount(miopenHandle_t handle, const miopenTensorDescriptor_t dwDesc, size_t* solutionCount) { - MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, solutionCount); + MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc); return miopen::try_([&] { auto ctx = ExecutionContext{}; auto problem = ProblemDescription{}; @@ -944,7 +943,7 @@ miopenConvolutionBackwardWeightsGetSolution(miopenHandle_t handle, size_t* solutionCount, miopenConvSolution_t* solutions) { - MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, maxSolutionCount, solutionCount); + MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, maxSolutionCount); return miopen::try_([&] { auto ctx = ExecutionContext{}; auto problem = ProblemDescription{}; @@ -967,7 +966,7 @@ extern "C" miopenStatus_t miopenConvolutionBackwardWeightsGetSolutionWorkspaceSi const uint64_t solution_id, size_t* workSpaceSize) { - MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, solution_id, workSpaceSize); + MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, solution_id); return miopen::try_([&] { if(miopen::deref(convDesc).mode == miopenTranspose) { @@ -1208,8 +1207,7 @@ miopenConvolutionBackwardDataGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dxDesc, size_t* workSpaceSize) { - - MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, workSpaceSize); + MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc); return miopen::try_([&] { auto ctx = ExecutionContext{}; auto problem = ProblemDescription{}; @@ -1226,8 +1224,7 @@ miopenConvolutionBackwardWeightsGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dwDesc, size_t* workSpaceSize) { - - MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, workSpaceSize); + MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc); return miopen::try_([&] { auto ctx = ExecutionContext{}; auto problem = ProblemDescription{}; @@ -1378,7 +1375,7 @@ extern "C" miopenStatus_t miopenGetConvolutionAttribute(miopenConvolutionDescrip const miopenConvolutionAttrib_t attr, int* const value) { - MIOPEN_LOG_FUNCTION(convDesc, attr, value); + MIOPEN_LOG_FUNCTION(convDesc, attr); return miopen::try_( [&] { miopen::deref(value) = miopen::deref(convDesc).attribute.Get(attr); }); } diff --git a/src/ctc_api.cpp b/src/ctc_api.cpp index 5746912a92..a82e713f00 100644 --- a/src/ctc_api.cpp +++ b/src/ctc_api.cpp @@ -48,7 +48,7 @@ extern "C" miopenStatus_t miopenGetCTCLossDescriptor(miopenCTCLossDescriptor_t c int* blank_label_id = nullptr, bool* apply_softmax_layer = nullptr) { - MIOPEN_LOG_FUNCTION(ctcLossDesc, dataType, blank_label_id, apply_softmax_layer); + MIOPEN_LOG_FUNCTION(ctcLossDesc); return miopen::try_([&] { miopen::deref(dataType) = miopen::deref(ctcLossDesc).dataType; if(blank_label_id != nullptr) @@ -82,14 +82,8 @@ miopenGetCTCLossWorkspaceSize(miopenHandle_t handle, const miopenCTCLossDescriptor_t ctcLossDesc, size_t* workSpaceSize) { - MIOPEN_LOG_FUNCTION(probsDesc, - gradientsDesc, - labels, - labelLengths, - inputLengths, - algo, - ctcLossDesc, - workSpaceSize); + MIOPEN_LOG_FUNCTION( + probsDesc, gradientsDesc, labels, labelLengths, inputLengths, algo, ctcLossDesc); return miopen::try_([&] { miopen::deref(workSpaceSize) = miopen::deref(ctcLossDesc) diff --git a/src/dropout_api.cpp b/src/dropout_api.cpp index 6cd17381fd..2b50f7bf80 100644 --- a/src/dropout_api.cpp +++ b/src/dropout_api.cpp @@ -39,7 +39,6 @@ extern "C" miopenStatus_t miopenCreateDropoutDescriptor(miopenDropoutDescriptor_ extern "C" miopenStatus_t miopenDestroyDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc) { - MIOPEN_LOG_FUNCTION(dropoutDesc); return miopen::try_([&] { miopen_destroy_object(dropoutDesc); }); } @@ -47,8 +46,7 @@ extern "C" miopenStatus_t miopenDestroyDropoutDescriptor(miopenDropoutDescriptor extern "C" miopenStatus_t miopenDropoutGetReserveSpaceSize(const miopenTensorDescriptor_t xDesc, size_t* reserveSpaceSizeInBytes) { - - MIOPEN_LOG_FUNCTION(xDesc, reserveSpaceSizeInBytes); + MIOPEN_LOG_FUNCTION(xDesc); return miopen::try_([&] { miopen::deref(reserveSpaceSizeInBytes) = miopen::deref(xDesc).GetElementSize() * sizeof(bool); @@ -58,8 +56,7 @@ extern "C" miopenStatus_t miopenDropoutGetReserveSpaceSize(const miopenTensorDes extern "C" miopenStatus_t miopenDropoutGetStatesSize(miopenHandle_t handle, size_t* stateSizeInBytes) { - - MIOPEN_LOG_FUNCTION(stateSizeInBytes); + MIOPEN_LOG_FUNCTION(handle); return miopen::try_([&] { miopen::deref(stateSizeInBytes) = std::min(size_t(MAX_PRNG_STATE), miopen::deref(handle).GetImage3dMaxWidth()) * @@ -76,7 +73,7 @@ extern "C" miopenStatus_t miopenGetDropoutDescriptor(miopenDropoutDescriptor_t d bool* state_evo, miopenRNGType_t* rng_mode) { - MIOPEN_LOG_FUNCTION(dropoutDesc, dropout, states, seed, use_mask, state_evo); + MIOPEN_LOG_FUNCTION(dropoutDesc); return miopen::try_([&] { miopen::deref(dropout) = miopen::deref(dropoutDesc).dropout; miopen::deref(states) = &(miopen::deref(dropoutDesc).pstates); diff --git a/src/fused_api.cpp b/src/fused_api.cpp index 0ee0cebc99..c2e2612e3d 100644 --- a/src/fused_api.cpp +++ b/src/fused_api.cpp @@ -87,7 +87,7 @@ miopenFusionPlanGetWorkSpaceSize(miopenHandle_t handle, size_t* workSpaceSize, miopenConvFwdAlgorithm_t algo) { - MIOPEN_LOG_FUNCTION(handle, fusePlanDesc, workSpaceSize); + MIOPEN_LOG_FUNCTION(handle, fusePlanDesc, algo); miopenStatus_t res = miopenStatusUnknownError; miopen::try_([&] { size_t sz; @@ -103,7 +103,7 @@ miopenFusionPlanConvolutionGetAlgo(miopenFusionPlanDescriptor_t fusePlanDesc, int* returnedAlgoCount, miopenConvFwdAlgorithm_t* returnedAlgos) { - MIOPEN_LOG_FUNCTION(fusePlanDesc, requestAlgoCount, returnedAlgoCount, returnedAlgos); + MIOPEN_LOG_FUNCTION(fusePlanDesc, requestAlgoCount); miopenStatus_t res = miopenStatusUnknownError; miopen::try_([&] { int cnt = 0; diff --git a/src/include/miopen/gemm_v2.hpp b/src/include/miopen/gemm_v2.hpp index 9300ffa29b..4d7bb8e153 100644 --- a/src/include/miopen/gemm_v2.hpp +++ b/src/include/miopen/gemm_v2.hpp @@ -77,7 +77,7 @@ struct GemmDescriptor miopenDataType_t a_cast_type; miopenDataType_t b_cast_type; ConvolutionAttribute conv_attributes; - GemmDescriptor() {} + GemmDescriptor() = delete; GemmDescriptor(bool isColMajor_, bool transA_, bool transB_, diff --git a/src/lrn_api.cpp b/src/lrn_api.cpp index d922d2c489..fa7f966992 100644 --- a/src/lrn_api.cpp +++ b/src/lrn_api.cpp @@ -56,8 +56,7 @@ extern "C" miopenStatus_t miopenGetLRNDescriptor(const miopenLRNDescriptor_t lrn double* lrnBeta, double* lrnK) { - - MIOPEN_LOG_FUNCTION(lrnDesc, mode, lrnN, lrnAlpha, lrnBeta, lrnK); + MIOPEN_LOG_FUNCTION(lrnDesc); return miopen::try_([&] { *mode = miopen::deref(lrnDesc).GetMode(); *lrnN = miopen::deref(lrnDesc).GetN(); @@ -70,6 +69,7 @@ extern "C" miopenStatus_t miopenGetLRNDescriptor(const miopenLRNDescriptor_t lrn extern "C" miopenStatus_t miopenLRNGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc, size_t* workSpaceSize) { + MIOPEN_LOG_FUNCTION(yDesc); // TODO: Supporting size 4 bytes only return miopen::try_([&] { diff --git a/src/pooling_api.cpp b/src/pooling_api.cpp index a2c792f8bb..321194ffe5 100644 --- a/src/pooling_api.cpp +++ b/src/pooling_api.cpp @@ -137,7 +137,7 @@ extern "C" miopenStatus_t miopenSetPoolingIndexType(miopenPoolingDescriptor_t po extern "C" miopenStatus_t miopenGetPoolingIndexType(miopenPoolingDescriptor_t poolDesc, miopenIndexType_t* index_type) { - MIOPEN_LOG_FUNCTION(poolDesc, index_type); + MIOPEN_LOG_FUNCTION(poolDesc); return miopen::try_([&] { *index_type = miopen::deref(poolDesc).GetIndexType(); }); } @@ -153,7 +153,7 @@ extern "C" miopenStatus_t miopenGetPoolingWorkSpaceIndexMode(miopenPoolingDescriptor_t poolDesc, miopenPoolingWorkspaceIndexMode_t* workspace_index) { - MIOPEN_LOG_FUNCTION(poolDesc, workspace_index); + MIOPEN_LOG_FUNCTION(poolDesc); return miopen::try_( [&] { *workspace_index = miopen::deref(poolDesc).GetWorkspaceIndexMode(); }); } @@ -189,8 +189,7 @@ extern "C" miopenStatus_t miopenGet2dPoolingDescriptor(const miopenPoolingDescri int* stride_w) { - MIOPEN_LOG_FUNCTION( - poolDesc, mode, windowHeight, windowWidth, pad_h, pad_w, stride_h, stride_w); + MIOPEN_LOG_FUNCTION(poolDesc); return miopen::try_([&] { miopen::deref(mode) = miopen::deref(poolDesc).mode; std::tie(miopen::deref(windowHeight), miopen::deref(windowWidth)) = @@ -224,7 +223,7 @@ extern "C" miopenStatus_t miopenGetNdPoolingDescriptor(miopenPoolingDescriptor_t int* padA, int* stridesA) { - + MIOPEN_LOG_FUNCTION(poolDesc, nbDimsRequested); return miopen::try_([&] { if(mode != nullptr) { @@ -262,7 +261,7 @@ miopenGetPoolingNdForwardOutputDim(const miopenPoolingDescriptor_t poolDesc, int* tensorDimArr) { - MIOPEN_LOG_FUNCTION(poolDesc, tensorDesc, dims, tensorDimArr); + MIOPEN_LOG_FUNCTION(poolDesc, tensorDesc, dims); return miopen::try_([&] { miopen::deref(poolDesc).GetForwardOutputDimNd( miopen::deref(tensorDesc), dims, tensorDimArr); @@ -278,7 +277,7 @@ miopenGetPoolingForwardOutputDim(const miopenPoolingDescriptor_t poolDesc, int* w) { - MIOPEN_LOG_FUNCTION(poolDesc, tensorDesc, n, c, h, w); + MIOPEN_LOG_FUNCTION(poolDesc, tensorDesc); return miopen::try_([&] { miopen::tie_deref(n, c, h, w) = miopen::deref(poolDesc).GetForwardOutputDim(miopen::deref(tensorDesc)); @@ -289,8 +288,7 @@ miopenGetPoolingForwardOutputDim(const miopenPoolingDescriptor_t poolDesc, extern "C" miopenStatus_t miopenPoolingGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc, size_t* workSpaceSize) { - - MIOPEN_LOG_FUNCTION(yDesc, workSpaceSize); + MIOPEN_LOG_FUNCTION(yDesc); return miopen::try_([&] { auto len = miopen::deref(yDesc).GetLengths(); size_t sz = std::accumulate(len.begin(), len.end(), size_t{1}, std::multiplies()); @@ -302,8 +300,7 @@ extern "C" miopenStatus_t miopenPoolingGetWorkSpaceSizeV2(const miopenPoolingDes const miopenTensorDescriptor_t yDesc, size_t* workSpaceSize) { - - MIOPEN_LOG_FUNCTION(poolDesc, yDesc, workSpaceSize); + MIOPEN_LOG_FUNCTION(poolDesc, yDesc); return miopen::try_( [&] { *workSpaceSize = miopen::deref(poolDesc).GetWorkSpaceSize(miopen::deref(yDesc)); }); } diff --git a/src/reducetensor_api.cpp b/src/reducetensor_api.cpp index 39efcf9eca..fc43a3ee1f 100644 --- a/src/reducetensor_api.cpp +++ b/src/reducetensor_api.cpp @@ -131,12 +131,7 @@ miopenGetReduceTensorDescriptor(const miopenReduceTensorDescriptor_t reduceTenso miopenReduceTensorIndices_t* reduceTensorIndices, miopenIndicesType_t* reduceTensorIndicesType) { - MIOPEN_LOG_FUNCTION(reduceTensorDesc, - reduceTensorOp, - reduceTensorCompType, - reduceTensorNanOpt, - reduceTensorIndices, - reduceTensorIndicesType); + MIOPEN_LOG_FUNCTION(reduceTensorDesc); return miopen::try_([&] { miopen::deref(reduceTensorOp) = miopen::deref(reduceTensorDesc).reduceTensorOp_; miopen::deref(reduceTensorCompType) = miopen::deref(reduceTensorDesc).reduceTensorCompType_; @@ -154,7 +149,7 @@ miopenGetReductionIndicesSize(miopenHandle_t handle, const miopenTensorDescriptor_t cDesc, size_t* sizeInBytes) { - MIOPEN_LOG_FUNCTION(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); + MIOPEN_LOG_FUNCTION(handle, reduceTensorDesc, aDesc, cDesc); return miopen::try_([&] { miopen::deref(sizeInBytes) = @@ -171,7 +166,7 @@ miopenGetReductionWorkspaceSize(miopenHandle_t handle, size_t* sizeInBytes) { - MIOPEN_LOG_FUNCTION(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); + MIOPEN_LOG_FUNCTION(handle, reduceTensorDesc, aDesc, cDesc); return miopen::try_([&] { miopen::deref(sizeInBytes) = miopen::deref(reduceTensorDesc) diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp index 3f83c16826..4ad183f2bf 100644 --- a/src/rnn_api.cpp +++ b/src/rnn_api.cpp @@ -125,8 +125,7 @@ extern "C" miopenStatus_t miopenGetRNNDescriptor(miopenRNNDescriptor_t rnnDesc, int* layer) { - MIOPEN_LOG_FUNCTION( - rnnDesc, rnnMode, algoMode, inputMode, dirMode, biasMode, hiddenSize, layer); + MIOPEN_LOG_FUNCTION(rnnDesc); return miopen::try_([&] { if(rnnMode != nullptr) { @@ -170,16 +169,7 @@ extern "C" miopenStatus_t miopenGetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDes miopenRNNAlgo_t* algoMode, miopenDataType_t* dataType) { - MIOPEN_LOG_FUNCTION(rnnDesc, - hiddenSize, - layer, - dropoutDesc, - inputMode, - dirMode, - rnnMode, - biasMode, - algoMode, - dataType); + MIOPEN_LOG_FUNCTION(rnnDesc); return miopen::try_([&] { if(rnnMode != nullptr) { @@ -265,7 +255,7 @@ extern "C" miopenStatus_t miopenGetRNNWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t* xDesc, size_t* numBytes) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, sequenceLen, xDesc, numBytes); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, sequenceLen, xDesc); miopen::c_array_view xDescArray{xDesc, size_t(sequenceLen)}; return miopen::try_([&] { miopen::deref(numBytes) = @@ -279,7 +269,7 @@ extern "C" miopenStatus_t miopenGetRNNTrainingReserveSize(miopenHandle_t handle, const miopenTensorDescriptor_t* xDesc, size_t* numBytes) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, sequenceLen, xDesc, numBytes); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, sequenceLen, xDesc); miopen::c_array_view xDescArray{xDesc, size_t(sequenceLen)}; return miopen::try_([&] { miopen::deref(numBytes) = @@ -294,7 +284,7 @@ extern "C" miopenStatus_t miopenGetRNNTempSpaceSizes(miopenHandle_t handle, size_t* workSpaceSize, size_t* reserveSpaceSize) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, workSpaceSize, reserveSpaceSize); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, fwdMode); return miopen::try_([&] { if(workSpaceSize != nullptr) @@ -317,7 +307,7 @@ extern "C" miopenStatus_t miopenGetRNNParamsDescriptor(miopenHandle_t handle, miopenTensorDescriptor_t wDesc, miopenDataType_t dtype) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, wDesc, dtype); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, dtype); return miopen::try_([&] { miopen::deref(rnnDesc).GetParamsDescriptor( miopen::deref(handle), miopen::deref(xDesc), miopen::deref(wDesc), dtype); @@ -330,7 +320,7 @@ extern "C" miopenStatus_t miopenGetRNNParamsSize(miopenHandle_t handle, size_t* numBytes, miopenDataType_t dtype) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, numBytes, dtype); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, dtype); return miopen::try_([&] { miopen::deref(numBytes) = miopen::deref(rnnDesc).GetParamsSize( miopen::deref(handle), miopen::deref(xDesc), dtype); @@ -343,7 +333,7 @@ extern "C" miopenStatus_t miopenGetRNNInputTensorSize(miopenHandle_t handle, miopenTensorDescriptor_t* xDesc, size_t* numBytes) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, seqLen, xDesc, numBytes); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, seqLen, xDesc); miopen::c_array_view xDescArray{xDesc, size_t(seqLen)}; return miopen::try_([&] { miopen::deref(numBytes) = miopen::deref(rnnDesc).GetRNNInputSuperTensorSize( @@ -357,7 +347,7 @@ extern "C" miopenStatus_t miopenGetRNNHiddenTensorSize(miopenHandle_t handle, miopenTensorDescriptor_t* xDesc, size_t* numBytes) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, xDesc, numBytes); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, seqLen, xDesc); miopen::c_array_view xDescArray{xDesc, size_t(seqLen)}; return miopen::try_([&] { miopen::deref(numBytes) = @@ -372,7 +362,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerParamSize(miopenHandle_t handle, const int paramID, size_t* numBytes) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, paramID, numBytes); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, paramID); return miopen::try_([&] { miopen::deref(numBytes) = miopen::deref(rnnDesc).GetLayerParamSize( miopen::deref(handle), layer, miopen::deref(xDesc), paramID); @@ -385,7 +375,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle, const int biasID, size_t* numBytes) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, biasID, numBytes); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, biasID); return miopen::try_([&] { miopen::deref(numBytes) = miopen::deref(rnnDesc).GetLayerBiasSize(miopen::deref(handle), layer, biasID); @@ -402,7 +392,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle, miopenTensorDescriptor_t paramDesc, void* layerParam) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, wDesc, w, paramID, paramDesc, layerParam); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, wDesc, w, paramID); return miopen::try_([&] { miopen::deref(rnnDesc).GetLayerParam(miopen::deref(handle), layer, @@ -425,7 +415,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle, miopenTensorDescriptor_t biasDesc, void* layerBias) { - MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, wDesc, w, biasID, biasDesc, layerBias); + MIOPEN_LOG_FUNCTION(handle, rnnDesc, layer, xDesc, wDesc, w, biasID); return miopen::try_([&] { miopen::deref(rnnDesc).GetLayerBias(miopen::deref(handle), layer, @@ -445,7 +435,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t rnn miopenTensorDescriptor_t paramDesc, size_t* layerParamOffset) { - MIOPEN_LOG_FUNCTION(rnnDesc, layer, xDesc, paramID, paramDesc, layerParamOffset); + MIOPEN_LOG_FUNCTION(rnnDesc, layer, xDesc, paramID); return miopen::try_([&] { miopen::deref(rnnDesc).GetLayerParamOffset( layer, miopen::deref(xDesc), paramID, miopen::deref(paramDesc), layerParamOffset); @@ -459,7 +449,7 @@ extern "C" miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t rnnD miopenTensorDescriptor_t biasDesc, size_t* layerBiasOffset) { - MIOPEN_LOG_FUNCTION(rnnDesc, layer, xDesc, biasID, biasDesc, layerBiasOffset); + MIOPEN_LOG_FUNCTION(rnnDesc, layer, xDesc, biasID); return miopen::try_([&] { miopen::deref(rnnDesc).GetLayerBiasOffset( layer, miopen::deref(xDesc), biasID, miopen::deref(biasDesc), layerBiasOffset); diff --git a/src/sum_api.cpp b/src/sum_api.cpp index de3744f306..bac24383a3 100644 --- a/src/sum_api.cpp +++ b/src/sum_api.cpp @@ -89,7 +89,7 @@ extern "C" miopenStatus_t miopenGetSumWorkspaceSize(miopenHandle_t handle, size_t* sizeInBytes) { - MIOPEN_LOG_FUNCTION(handle, xDesc, dim, yDesc, sizeInBytes); + MIOPEN_LOG_FUNCTION(handle, xDesc, dim, yDesc); return miopen::try_([&] { miopen::deref(sizeInBytes) = miopen::GetSumWorkspaceSize( diff --git a/src/tensor_api.cpp b/src/tensor_api.cpp index 307395f59d..9f742a77aa 100644 --- a/src/tensor_api.cpp +++ b/src/tensor_api.cpp @@ -104,7 +104,7 @@ extern "C" miopenStatus_t miopenGet4dTensorDescriptor(miopenTensorDescriptor_t t int* wStride) { - MIOPEN_LOG_FUNCTION(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride); + MIOPEN_LOG_FUNCTION(tensorDesc); return miopen::try_([&] { miopen::deref(dataType) = miopen::deref(tensorDesc).GetType(); miopen::tie_deref(n, c, h, w) = miopen::tien<4>(miopen::deref(tensorDesc).GetLengths()); @@ -234,7 +234,7 @@ extern "C" miopenStatus_t miopenGetTensorNumBytes(miopenTensorDescriptor_t tenso size_t* numBytes) { - MIOPEN_LOG_FUNCTION(tensorDesc, numBytes); + MIOPEN_LOG_FUNCTION(tensorDesc); return miopen::try_([&] { miopen::deref(numBytes) = miopen::deref(tensorDesc).GetNumBytes(); }); } @@ -247,7 +247,7 @@ int miopenGetTensorDescriptorElementSize(miopenTensorDescriptor_t tensorDesc) extern "C" miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t tensorDesc, int* size) { - MIOPEN_LOG_FUNCTION(tensorDesc, size); + MIOPEN_LOG_FUNCTION(tensorDesc); return miopen::try_([&] { miopen::deref(size) = miopen::deref(tensorDesc).GetSize(); }); } @@ -257,7 +257,7 @@ extern "C" miopenStatus_t miopenGetTensorDescriptor(miopenTensorDescriptor_t ten int* stridesA) { - MIOPEN_LOG_FUNCTION(tensorDesc, dataType, dimsA, stridesA); + MIOPEN_LOG_FUNCTION(tensorDesc); return miopen::try_([&] { if(dataType != nullptr) { From b17d08055137563a0e5b286cf4b587cd6a9a6ec7 Mon Sep 17 00:00:00 2001 From: JD Date: Fri, 15 Dec 2023 19:14:15 -0600 Subject: [PATCH 2/4] [hipRTC] resolve symbol issues by explicitly link with hipRTC (#2612) * explicitly link with hipRTC * Update formatting * Consider MIOPEN_USE_HIPRTC=Off * Clean up --------- Co-authored-by: Jun Liu --- CMakeLists.txt | 4 +--- src/CMakeLists.txt | 8 ++++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cdf6829c4e..2fd18fce13 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -381,9 +381,7 @@ if(MIOPEN_USE_HIPRTC) if(NOT MIOPEN_USE_COMGR) message(FATAL_ERROR "HIPRTC can be used only together with COMGR") endif() - if(WIN32) - find_package(hiprtc REQUIRED) - endif() + find_package(hiprtc REQUIRED) message(STATUS "Build with HIPRTC") endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4ed162ae92..1d5548db7e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -761,8 +761,12 @@ if( MIOPEN_BACKEND STREQUAL "OpenCL") elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP") target_link_libraries( MIOpen PRIVATE hip::device ) target_link_libraries( MIOpen INTERFACE hip::host ) - if(MIOPEN_USE_HIPRTC AND WIN32) - target_link_libraries( MIOpen PRIVATE hiprtc::hiprtc ) + if(MIOPEN_USE_HIPRTC) + if(WIN32) + target_link_libraries( MIOpen PRIVATE hiprtc::hiprtc ) + else() + target_link_libraries( MIOpen PRIVATE hiprtc) + endif() endif() if(ENABLE_HIP_WORKAROUNDS) # Workaround hip not setting its usage requirements correctly From 62a05348f511ebd8378ef00596aa2d3d7da80179 Mon Sep 17 00:00:00 2001 From: amberhassaan Date: Sat, 16 Dec 2023 14:34:46 -0500 Subject: [PATCH 3/4] Standardize workspace abstraction (#2524) --- src/ocl/convolutionocl.cpp | 21 +++ test/conv_common.hpp | 220 +++++++++--------------- test/ctc.cpp | 8 +- test/find_2_conv.cpp | 23 +-- test/find_db.cpp | 28 ++- test/gru_common.hpp | 104 +++++------ test/gtest/conv_api_strided_tensors.cpp | 22 +-- test/gtest/solver_bwd.hpp | 14 +- test/gtest/solver_fwd.hpp | 14 +- test/gtest/solver_wrw.hpp | 14 +- test/lstm_common.hpp | 173 +++++++++++-------- test/main.cpp | 114 +++--------- test/pooling_common.hpp | 21 +-- test/reduce_test.cpp | 77 ++++----- test/rnn_vanilla_common.hpp | 69 ++++---- test/tensor_reorder.cpp | 91 +++------- test/workspace.hpp | 163 ++++++++++++++++++ 17 files changed, 595 insertions(+), 581 deletions(-) create mode 100644 test/workspace.hpp diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 73e17380ae..94251cc6d0 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -131,6 +131,18 @@ static inline void ValidateGroupCount(const TensorDescriptor& x, MIOPEN_THROW(miopenStatusBadParm, "Invalid group number"); } +static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize) +{ + + [[maybe_unused]] bool x = (workSpace != nullptr); + [[maybe_unused]] bool y = (workSpaceSize != 0); + + assert(((x && y) || (!x && !y)) && "workspace pointer and size don't match. Either both should " + "be zero or both should be non-zero"); + + /// \todo could add a check here that workSpace points to GPU memory +} + static Invoker PrepareInvoker(ExecutionContext ctx, const conv::ProblemDescription& problem, const NetworkConfig& config, @@ -260,6 +272,7 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle, bool exhaustiveSearch) const { MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); if(x == nullptr || w == nullptr || y == nullptr) MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL"); if(returnedAlgoCount == nullptr) @@ -495,6 +508,7 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle, size_t workSpaceSize) const { MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y}; ValidateTensors(tensors); @@ -812,6 +826,7 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle, const solver::Id solver_id) const { MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y}; ValidateTensors(tensors); @@ -846,6 +861,7 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle, bool exhaustiveSearch) const { MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); if(dx == nullptr || w == nullptr || dy == nullptr) MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL"); if(returnedAlgoCount == nullptr) @@ -944,6 +960,7 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle, size_t workSpaceSize) const { MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx}; @@ -1015,6 +1032,7 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle, solver::Id solver_id) const { MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx}; ValidateTensors(tensors); @@ -1055,6 +1073,7 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle, bool exhaustiveSearch) const { MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); if(x == nullptr || dw == nullptr || dy == nullptr) MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL"); if(returnedAlgoCount == nullptr) @@ -1151,6 +1170,7 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle, size_t workSpaceSize) const { MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw}; ValidateTensors(tensors); ValidateAlphaBeta(alpha, beta); @@ -1218,6 +1238,7 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle, solver::Id solver_id) const { MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); + ValidateWorkspace(workSpace, workSpaceSize); auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw}; ValidateTensors(tensors); diff --git a/test/conv_common.hpp b/test/conv_common.hpp index efaf4d8f0a..e387e98b5b 100644 --- a/test/conv_common.hpp +++ b/test/conv_common.hpp @@ -25,6 +25,7 @@ *******************************************************************************/ #pragma once #include "test.hpp" +#include "workspace.hpp" #include #include #include @@ -330,13 +331,11 @@ struct conv_base EXPECT_EQUAL(miopenStatusSuccess, miopenGetSolutionWorkspaceSize(solution, &workspace_size)); - const auto workspace_dev = workspace_size != 0 - ? get_handle().Write(std::vector(workspace_size)) - : nullptr; + Workspace wspace{workspace_size}; - EXPECT_EQUAL(miopenStatusSuccess, - miopenRunSolution( - handle, solution, 3, arguments, workspace_dev.get(), workspace_size)); + EXPECT_EQUAL( + miopenStatusSuccess, + miopenRunSolution(handle, solution, 3, arguments, wspace.ptr(), wspace.size())); } const auto& solution_deref = miopen::deref(solutions.front()); @@ -596,19 +595,6 @@ struct verify_forward_conv : conv_base return rout; } - void resize_workspace(miopen::Handle& h, - const std::size_t sz, - std::vector& ws, - miopen::Allocator::ManageDataPtr& ws_dev) const - { - ws_dev.reset(); - if(sz > 0) - { - ws.resize(sz); - ws_dev = h.Write(ws); - } - } - tensor gpu() { auto&& handle = get_handle(); @@ -629,8 +615,7 @@ struct verify_forward_conv : conv_base bool fallback_path_taken = false; std::size_t count = 0; - std::vector ws; - miopen::Allocator::ManageDataPtr ws_dev = nullptr; + Workspace wspace{}; const auto ctx = ExecutionContext{&handle}; const auto problem = ConvProblemDescription{ @@ -649,8 +634,7 @@ struct verify_forward_conv : conv_base { int ret_algo_count; miopenConvAlgoPerf_t perf; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - resize_workspace(handle, workspace_size, ws, ws_dev); + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); filter.FindConvBwdDataAlgorithm(handle, input.desc, @@ -662,8 +646,8 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - ws_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } count = filter.GetSolutionCount(ctx, problem); @@ -696,7 +680,7 @@ struct verify_forward_conv : conv_base << " != " << ws_size << std::endl; } } - resize_workspace(handle, selected.workspace_size, ws, ws_dev); + wspace.resize(selected.workspace_size); filter.CompileSolution(ctx, problem, selected.solution_id); @@ -707,8 +691,8 @@ struct verify_forward_conv : conv_base wei_dev.get(), rout.desc, out_dev.get(), - ws_dev.get(), - selected.workspace_size, + wspace.ptr(), + wspace.size(), selected.solution_id); } else @@ -717,8 +701,7 @@ struct verify_forward_conv : conv_base { int ret_algo_count; miopenConvAlgoPerf_t perf; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - resize_workspace(handle, workspace_size, ws, ws_dev); + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); filter.FindConvFwdAlgorithm(handle, input.desc, @@ -730,8 +713,8 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - ws_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } @@ -765,7 +748,7 @@ struct verify_forward_conv : conv_base << " != " << ws_size << std::endl; } } - resize_workspace(handle, selected.workspace_size, ws, ws_dev); + wspace.resize(selected.workspace_size); filter.CompileSolution(ctx, problem, selected.solution_id); @@ -776,8 +759,8 @@ struct verify_forward_conv : conv_base in_dev.get(), rout.desc, out_dev.get(), - ws_dev.get(), - selected.workspace_size, + wspace.ptr(), + wspace.size(), selected.solution_id); } break; @@ -832,9 +815,7 @@ struct verify_forward_conv : conv_base if(api == ConvApi::Find_1_0) { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -851,15 +832,13 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionForward(handle, @@ -872,8 +851,8 @@ struct verify_forward_conv : conv_base &beta, rout.desc, out_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); /// \ref read_solver_name auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken); @@ -904,9 +883,7 @@ struct verify_forward_conv : conv_base { if(api == ConvApi::Find_1_0) { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -925,15 +902,13 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionBackwardData(handle, @@ -946,8 +921,8 @@ struct verify_forward_conv : conv_base &beta, rout.desc, out_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); } else { @@ -961,15 +936,13 @@ struct verify_forward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionForward(handle, @@ -982,8 +955,8 @@ struct verify_forward_conv : conv_base &beta, rout.desc, out_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); } /// \ref read_solver_name @@ -1118,6 +1091,8 @@ struct verify_backward_conv : conv_base auto wei_dev = handle.Write(weights.data); auto in_dev = handle.Write(rinput.data); + Workspace wspace{}; + miopenConvSolution_t selected; bool fallback_path_taken = false; std::size_t count = 0; @@ -1133,9 +1108,7 @@ struct verify_backward_conv : conv_base switch(api) { case ConvApi::Immediate: { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1154,8 +1127,8 @@ struct verify_backward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } count = filter.GetSolutionCount(ctx, problem); @@ -1183,18 +1156,14 @@ struct verify_backward_conv : conv_base }); selected = std::move(solutions.front()); - std::size_t ws_size; - - ws_size = filter.GetForwardSolutionWorkspaceSize( + [[maybe_unused]] std::size_t ws_size = filter.GetForwardSolutionWorkspaceSize( handle, weights.desc, out.desc, rinput.desc, selected.solution_id); filter.CompileSolution(ctx, problem, selected.solution_id); - workspace_dev.reset(); if(selected.workspace_size > 0) { - workspace.resize(selected.workspace_size); - workspace_dev = handle.Write(workspace); + wspace.resize(selected.workspace_size); } filter.ConvolutionForwardImmediate(handle, @@ -1204,8 +1173,8 @@ struct verify_backward_conv : conv_base out_dev.get(), rinput.desc, in_dev.get(), - workspace_dev.get(), - ws_size, + wspace.ptr(), + wspace.size(), selected.solution_id); } else @@ -1222,8 +1191,8 @@ struct verify_backward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } count = filter.GetSolutionCount(ctx, problem); @@ -1250,18 +1219,14 @@ struct verify_backward_conv : conv_base }); selected = std::move(solutions.front()); - std::size_t ws_size; - - ws_size = filter.GetBackwardSolutionWorkspaceSize( + [[maybe_unused]] std::size_t ws_size = filter.GetBackwardSolutionWorkspaceSize( handle, out.desc, weights.desc, rinput.desc, selected.solution_id); filter.CompileSolution(ctx, problem, selected.solution_id); - workspace_dev.reset(); if(selected.workspace_size > 0) { - workspace.resize(selected.workspace_size); - workspace_dev = handle.Write(workspace); + wspace.resize(selected.workspace_size); } filter.ConvolutionBackwardImmediate(handle, @@ -1271,16 +1236,14 @@ struct verify_backward_conv : conv_base wei_dev.get(), rinput.desc, in_dev.get(), - workspace_dev.get(), - ws_size, + wspace.ptr(), + wspace.size(), selected.solution_id); } break; } case ConvApi::Find_1_0: { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1299,15 +1262,13 @@ struct verify_backward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionForward(handle, @@ -1320,8 +1281,8 @@ struct verify_backward_conv : conv_base &beta, rinput.desc, in_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); } else { @@ -1335,15 +1296,13 @@ struct verify_backward_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionBackwardData(handle, @@ -1356,8 +1315,8 @@ struct verify_backward_conv : conv_base &beta, rinput.desc, in_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); } /// \ref read_solver_name @@ -1487,6 +1446,7 @@ struct verify_backward_weights_conv : conv_base auto out_dev = handle.Write(out.data); auto wei_dev = handle.Write(rweights.data); auto in_dev = handle.Write(input.data); + Workspace wspace{}; miopenConvSolution_t selected; bool fallback_path_taken = false; @@ -1503,9 +1463,7 @@ struct verify_backward_weights_conv : conv_base switch(api) { case ConvApi::Immediate: { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1523,8 +1481,8 @@ struct verify_backward_weights_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } @@ -1551,9 +1509,7 @@ struct verify_backward_weights_conv : conv_base }); selected = std::move(solutions.front()); - std::size_t ws_size; - - ws_size = filter.GetWrwSolutionWorkspaceSize( + [[maybe_unused]] std::size_t ws_size = filter.GetWrwSolutionWorkspaceSize( handle, filter.mode == miopenTranspose ? input.desc : out.desc, filter.mode == miopenTranspose ? out.desc : input.desc, @@ -1562,11 +1518,9 @@ struct verify_backward_weights_conv : conv_base filter.CompileSolution(ctx, problem, selected.solution_id); - workspace_dev.reset(); if(selected.workspace_size > 0) { - workspace.resize(selected.workspace_size); - workspace_dev = handle.Write(workspace); + wspace.resize(selected.workspace_size); } filter.ConvolutionWrwImmediate( @@ -1577,16 +1531,14 @@ struct verify_backward_weights_conv : conv_base filter.mode == miopenTranspose ? out_dev.get() : in_dev.get(), rweights.desc, wei_dev.get(), - workspace_dev.get(), - ws_size, + wspace.ptr(), + wspace.size(), selected.solution_id); break; } case ConvApi::Find_1_0: { - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + wspace.resize(filter.GetWorkSpaceSize(ctx, problem)); int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1603,15 +1555,13 @@ struct verify_backward_weights_conv : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); - workspace_dev.reset(); if(perf.memory > 0) { - workspace.resize(perf.memory); - workspace_dev = handle.Write(workspace); + wspace.resize(perf.memory); } filter.ConvolutionBackwardWeights( @@ -1625,8 +1575,8 @@ struct verify_backward_weights_conv : conv_base &beta, rweights.desc, wei_dev.get(), - workspace_dev.get(), - workspace_size); + wspace.ptr(), + wspace.size()); /// \ref read_solver_name const auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken); @@ -1778,9 +1728,7 @@ struct verify_forward_conv_int8 : conv_base wei_vpad_dev.get()); } - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - std::vector workspace(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)}; int ret_algo_count; miopenConvAlgoPerf_t perf; @@ -1797,8 +1745,8 @@ struct verify_forward_conv_int8 : conv_base 1, &ret_algo_count, &perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), search); } @@ -1826,9 +1774,7 @@ struct verify_forward_conv_int8 : conv_base }); auto selected = std::move(solutions.front()); - std::size_t ws_size; - - ws_size = + [[maybe_unused]] std::size_t ws_size = filter.GetForwardSolutionWorkspaceSize(handle, (is_transform ? weight_vpad_desc : weights.desc), (is_transform ? input_vpad_desc : input.desc), @@ -1837,11 +1783,9 @@ struct verify_forward_conv_int8 : conv_base filter.CompileSolution(ctx, problem, selected.solution_id); - workspace_dev.reset(); if(selected.workspace_size > 0) { - workspace.resize(selected.workspace_size); - workspace_dev = handle.Write(workspace); + wspace.resize(selected.workspace_size); } filter.ConvolutionForwardImmediate(handle, @@ -1851,8 +1795,8 @@ struct verify_forward_conv_int8 : conv_base (is_transform ? in_vpad_dev.get() : in_dev.get()), rout.desc, out_dev.get(), - workspace_dev.get(), - ws_size, + wspace.ptr(), + wspace.size(), selected.solution_id); if(count != 0) diff --git a/test/ctc.cpp b/test/ctc.cpp index d812cef801..1c759220f2 100644 --- a/test/ctc.cpp +++ b/test/ctc.cpp @@ -29,6 +29,7 @@ #include "tensor_holder.hpp" #include "test.hpp" #include "verify.hpp" +#include "workspace.hpp" #include "rnn_util.hpp" #include "random.hpp" #include @@ -651,8 +652,7 @@ struct verify_ctcloss inputLengths.data(), miopenCTCLossAlgo_t(0)); - auto workSpace = tensor{workSpaceSize / sizeof(T)}; - auto workSpace_dev = handle.Write(workSpace.data); + Workspace wspace{workSpaceSize}; auto losses_gpu = losses; auto grads_gpu = grads; @@ -671,8 +671,8 @@ struct verify_ctcloss grads.desc, grads_dev.get(), miopenCTCLossAlgo_t(0), - workSpace_dev.get(), - workSpaceSize); + wspace.ptr(), + wspace.size()); losses_gpu.data = handle.Read(losses_dev, losses_gpu.data.size()); grads_gpu.data = handle.Read(grads_dev, grads_gpu.data.size()); diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp index a89ce942a7..6e636e265f 100644 --- a/test/find_2_conv.cpp +++ b/test/find_2_conv.cpp @@ -27,6 +27,7 @@ #include "test.hpp" #include "driver.hpp" #include "get_handle.hpp" +#include "workspace.hpp" #include @@ -210,13 +211,11 @@ struct Find2Test : test_driver } const auto workspace_size = std::min(workspace_limit, workspace_max); - workspace_dev = workspace_size != 0 - ? miopen::deref(handle).Write(std::vector(workspace_size)) - : nullptr; + Workspace wspace{workspace_size}; - EXPECT_EQUAL(miopenSetFindOptionPreallocatedWorkspace( - options, workspace_dev.get(), workspace_size), - miopenStatusSuccess); + EXPECT_EQUAL( + miopenSetFindOptionPreallocatedWorkspace(options, wspace.ptr(), wspace.size()), + miopenStatusSuccess); EXPECT_EQUAL(miopenSetFindOptionPreallocatedTensor( options, miopenTensorConvolutionX, x_dev.get()), @@ -312,14 +311,11 @@ struct Find2Test : test_driver { std::cerr << "Running a solution..." << std::endl; - auto& handle_deref = get_handle(); - std::size_t workspace_size; EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size), miopenStatusSuccess); - auto workspace_dev = - workspace_size != 0 ? handle_deref.Write(std::vector(workspace_size)) : nullptr; + Workspace wspace{workspace_size}; const auto checked_run_solution = [&](miopenTensorDescriptor_t* descriptors_) { auto arguments = std::make_unique(num_arguments); @@ -331,10 +327,9 @@ struct Find2Test : test_driver arguments[i].buffer = buffers[i]; } - EXPECT_EQUAL( - miopenRunSolution( - handle, solution, 3, arguments.get(), workspace_dev.get(), workspace_size), - miopenStatusSuccess); + EXPECT_EQUAL(miopenRunSolution( + handle, solution, 3, arguments.get(), wspace.ptr(), wspace.size()), + miopenStatusSuccess); }; // Without descriptors diff --git a/test/find_db.cpp b/test/find_db.cpp index b07167f80e..47c485182e 100644 --- a/test/find_db.cpp +++ b/test/find_db.cpp @@ -27,6 +27,7 @@ #include "test.hpp" #include "driver.hpp" #include "get_handle.hpp" +#include "workspace.hpp" #include #include @@ -111,10 +112,7 @@ struct FindDbTest : test_driver const auto ctx = ExecutionContext{&handle}; const auto problem = conv::ProblemDescription{y.desc, w.desc, x.desc, filter, conv::Direction::BackwardData}; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - - auto workspace = std::vector(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)}; auto filterCall = [&]() { int ret_algo_count; @@ -130,8 +128,8 @@ struct FindDbTest : test_driver 1, &ret_algo_count, perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), false); }; @@ -145,10 +143,7 @@ struct FindDbTest : test_driver const auto ctx = ExecutionContext{&handle}; const auto problem = conv::ProblemDescription{x.desc, w.desc, y.desc, filter, conv::Direction::Forward}; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - - auto workspace = std::vector(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)}; auto filterCall = [&]() { int ret_algo_count; @@ -164,8 +159,8 @@ struct FindDbTest : test_driver 1, &ret_algo_count, perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), false); }; @@ -179,10 +174,7 @@ struct FindDbTest : test_driver const auto ctx = ExecutionContext{&handle}; const auto problem = conv::ProblemDescription{ y.desc, w.desc, x.desc, filter, conv::Direction::BackwardWeights}; - const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem); - - auto workspace = std::vector(workspace_size); - auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr; + Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)}; auto filterCall = [&]() { int ret_algo_count; @@ -198,8 +190,8 @@ struct FindDbTest : test_driver 1, &ret_algo_count, perf, - workspace_dev.get(), - workspace_size, + wspace.ptr(), + wspace.size(), false); }; diff --git a/test/gru_common.hpp b/test/gru_common.hpp index 51e3c09012..4df6209e9d 100644 --- a/test/gru_common.hpp +++ b/test/gru_common.hpp @@ -35,6 +35,7 @@ #include "verify.hpp" #include "rnn_util.hpp" #include "random.hpp" +#include "workspace.hpp" #include #include #include @@ -1963,8 +1964,8 @@ struct verify_forward_infer_gru #endif auto&& handle = get_handle(); - size_t out_sz = 0; - size_t workSpaceSize = 0; + size_t out_sz = 0; + size_t workspace_size = 0; std::vector inputCPPDescs; std::vector inputDescs; @@ -1979,9 +1980,8 @@ struct verify_forward_infer_gru hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - - std::vector workSpace(workSpaceSize / sizeof(T)); + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + Workspace wspace{workspace_size}; auto input_dev = handle.Write(input); @@ -1994,8 +1994,6 @@ struct verify_forward_infer_gru std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - auto workSpace_dev = handle.Write(workSpace); - std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; @@ -2027,8 +2025,8 @@ struct verify_forward_infer_gru ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize); + wspace.ptr(), + wspace.size()); #if(MIO_GRU_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -2249,7 +2247,7 @@ struct verify_forward_train_gru auto&& handle = get_handle(); size_t out_sz = 0; - size_t workSpaceSize = 0; + size_t workspace_size = 0; size_t reserveSpaceSize = 0; std::vector inputCPPDescs; @@ -2265,12 +2263,14 @@ struct verify_forward_train_gru hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + Workspace wspace{workspace_size}; + miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - - std::vector workSpace(workSpaceSize / sizeof(T)); - std::vector reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); + reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) & ~(sizeof(T) - 1); + assert(reserveSpaceSize % sizeof(T) == 0); + Workspace rspace{reserveSpaceSize}; auto input_dev = handle.Write(input); @@ -2284,9 +2284,6 @@ struct verify_forward_train_gru std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace); - std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; @@ -2318,10 +2315,10 @@ struct verify_forward_train_gru ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpaceSize); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_GRU_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -2331,10 +2328,9 @@ struct verify_forward_train_gru } #endif - auto retSet = std::make_tuple( - handle.Read(output_dev, output.size()), - (nohy ? initHidden : handle.Read(hy_dev, hy.size())), - handle.Read(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T))); + auto retSet = std::make_tuple(handle.Read(output_dev, output.size()), + (nohy ? initHidden : handle.Read(hy_dev, hy.size())), + rspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -2484,7 +2480,7 @@ struct verify_backward_data_gru int bi = dirMode != 0 ? 2 : 1; int hy_h = hiddenSize; int bi_stride = bi * hy_h; - size_t workSpaceSize; + size_t workspace_size; std::vector inputCPPDescs; std::vector inputDescs; @@ -2494,8 +2490,8 @@ struct verify_backward_data_gru // Outputs ---------- size_t in_sz = 0; miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, inputDescs.data(), &in_sz); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + std::vector workSpace(workspace_size / sizeof(T)); std::vector dx(in_sz / sizeof(T)); std::vector dhx(initHidden.size()); @@ -2562,8 +2558,7 @@ struct verify_backward_data_gru auto&& handle = get_handle(); - size_t out_sz = 0; - size_t workSpaceSize = 0; + size_t out_sz = 0; std::vector inputCPPDescs; std::vector inputDescs; @@ -2578,15 +2573,17 @@ struct verify_backward_data_gru hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); - auto workSpace_dev = handle.Write(workSpace); + size_t workspace_size = 0; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + Workspace wspace{workspace_size}; miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz); - auto yin_dev = handle.Write(yin); - auto dyin_dev = handle.Write(dy); - auto reserveSpace_dev = handle.Write(reserveSpace); - auto weights_dev = handle.Write(weights); + auto yin_dev = handle.Write(yin); + auto dyin_dev = handle.Write(dy); + auto weights_dev = handle.Write(weights); + + Workspace rspace{}; + rspace.Write(reserveSpace); std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); @@ -2633,15 +2630,15 @@ struct verify_backward_data_gru ((nodhx) ? nullptr : dhx_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); auto retSet = std::make_tuple(handle.Read(dx_dev, dx.size()), (nodhx ? initHidden : handle.Read(dhx_dev, dhx.size())), - handle.Read(reserveSpace_dev, reserveSpace.size()), - handle.Read(workSpace_dev, workSpace.size())); + rspace.Read>(), + wspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -2840,8 +2837,11 @@ struct verify_backward_weights_gru hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace); + Workspace wspace{}; + wspace.Write(workSpace); + Workspace rspace{}; + rspace.Write(reserveSpace); + std::vector dweights(weightSize); auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); @@ -2869,10 +2869,10 @@ struct verify_backward_weights_gru dy_dev.get(), &weightDesc, dweights_dev.get(), - workSpace_dev.get(), - workSpace.size() * sizeof(T), - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -3123,10 +3123,10 @@ struct gru_basic_driver : test_driver size_t reserveSpaceSize; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - size_t workSpaceSize; - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + size_t workspace_size; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize + 2 * out_sz + + size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size + 2 * out_sz + (in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) + (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz)) * sizeof(T); diff --git a/test/gtest/conv_api_strided_tensors.cpp b/test/gtest/conv_api_strided_tensors.cpp index 9a2876b3f0..d4e49201f4 100644 --- a/test/gtest/conv_api_strided_tensors.cpp +++ b/test/gtest/conv_api_strided_tensors.cpp @@ -29,6 +29,7 @@ #include #include #include "platform.hpp" +#include "../workspace.hpp" #define MIOPEN_CHECK_RET(val) ASSERT_EQ(val, miopenStatusSuccess) @@ -66,10 +67,12 @@ class ConvStridedTensors : public ::testing::Test MIOPEN_CHECK_RET(miopenSetConvolutionGroupCount(conv_descr, 1)); // Workspace + size_t sz = 0; MIOPEN_CHECK_RET(miopenConvolutionForwardGetWorkSpaceSize( - handle, filter_descr, input_descr, conv_descr, output_descr, &workspace_size)); + handle, filter_descr, input_descr, conv_descr, output_descr, &sz)); // Data + wspace.resize(sz); h_input.resize(input_size); h_filter.resize(filter_size); h_output.resize(output_size); @@ -125,7 +128,7 @@ class ConvStridedTensors : public ::testing::Test std::vector dilation = {1, 1, 1}; // Workspace - size_t workspace_size; + Workspace wspace{}; // Data const size_t input_size = input_dims[0] * input_strides[0]; @@ -143,10 +146,9 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented) { auto device = Device(handle); - auto d_workspace = device.Malloc(workspace_size); - auto d_input = device.Malloc(input_bytes); - auto d_filter = device.Malloc(filter_bytes); - auto d_output = device.Malloc(output_bytes); + auto d_input = device.Malloc(input_bytes); + auto d_filter = device.Malloc(filter_bytes); + auto d_output = device.Malloc(output_bytes); std::fill_n(h_input.begin(), h_input.size(), 1.f); ASSERT_TRUE(d_input.CopyToDevice(h_input.data(), input_bytes)); @@ -168,8 +170,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented) sizeof(perf_results) / sizeof(perf_results[0]), &perf_results_count, perf_results, - d_workspace.Data(), - workspace_size, + wspace.ptr(), + wspace.size(), true), miopenStatusSuccess); ASSERT_GT(perf_results_count, 0); @@ -189,8 +191,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented) &beta, output_descr, d_output.Data(), - d_workspace.Data(), - workspace_size), + wspace.ptr(), + wspace.size()), miopenStatusSuccess); ASSERT_TRUE(device.Synchronize()); } diff --git a/test/gtest/solver_bwd.hpp b/test/gtest/solver_bwd.hpp index cb55a5951b..6511800ce6 100644 --- a/test/gtest/solver_bwd.hpp +++ b/test/gtest/solver_bwd.hpp @@ -40,6 +40,7 @@ #include "conv_tensor_gen.hpp" #include "get_solver.hpp" +#include "../workspace.hpp" template struct ConvBwdSolverTest @@ -76,15 +77,11 @@ struct ConvBwdSolverTest if(solv.MayNeedWorkspace()) { const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem); - workspace_dev = handle.Create(cur_sol_ws); - workspace_size = cur_sol_ws; + wspace.resize(cur_sol_ws); } - const auto invoke_params = - miopen::conv::DataInvokeParams{tensors, - workspace_dev.get(), - workspace_size, - conv_desc.attribute.gfx90aFp16alt.GetBwd()}; + const auto invoke_params = miopen::conv::DataInvokeParams{ + tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()}; auto sol = GetSolution(solv, ctx, problem); ASSERT_TRUE(sol.Succeeded()); @@ -177,8 +174,7 @@ struct ConvBwdSolverTest miopen::Allocator::ManageDataPtr in_dev; miopen::Allocator::ManageDataPtr wei_dev; miopen::Allocator::ManageDataPtr out_dev; - miopen::Allocator::ManageDataPtr workspace_dev; - size_t workspace_size; + Workspace wspace{}; miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect; bool test_skipped = false; }; diff --git a/test/gtest/solver_fwd.hpp b/test/gtest/solver_fwd.hpp index 88fa9a9c55..20b16fcc32 100644 --- a/test/gtest/solver_fwd.hpp +++ b/test/gtest/solver_fwd.hpp @@ -38,6 +38,7 @@ #include "conv_test_base.hpp" #include "get_solver.hpp" +#include "../workspace.hpp" template struct ConvFwdSolverTest @@ -77,15 +78,11 @@ struct ConvFwdSolverTest if(solv.MayNeedWorkspace()) { const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem); - workspace_dev = handle.Create(cur_sol_ws); - workspace_size = cur_sol_ws; + wspace.resize(cur_sol_ws); } - const auto invoke_params = - miopen::conv::DataInvokeParams{tensors, - workspace_dev.get(), - workspace_size, - this->conv_desc.attribute.gfx90aFp16alt.GetFwd()}; + const auto invoke_params = miopen::conv::DataInvokeParams{ + tensors, wspace.ptr(), wspace.size(), this->conv_desc.attribute.gfx90aFp16alt.GetFwd()}; // auto sol = solv.GetSolution(ctx, problem); // This is complicated due to the split between tunable and non-tunable solvers @@ -116,8 +113,7 @@ struct ConvFwdSolverTest } ConvTestCase conv_config; - miopen::Allocator::ManageDataPtr workspace_dev; - size_t workspace_size; + Workspace wspace{}; miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect; bool test_skipped = false; miopenTensorLayout_t tensor_layout; diff --git a/test/gtest/solver_wrw.hpp b/test/gtest/solver_wrw.hpp index dde92e2071..dcf8311d83 100644 --- a/test/gtest/solver_wrw.hpp +++ b/test/gtest/solver_wrw.hpp @@ -40,6 +40,7 @@ #include "conv_tensor_gen.hpp" #include "get_solver.hpp" +#include "../workspace.hpp" template struct ConvWrwSolverTest @@ -76,15 +77,11 @@ struct ConvWrwSolverTest if(solv.MayNeedWorkspace()) { const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem); - workspace_dev = handle.Create(cur_sol_ws); - workspace_size = cur_sol_ws; + wspace.resize(cur_sol_ws); } - const auto invoke_params = - miopen::conv::WrWInvokeParams{tensors, - workspace_dev.get(), - workspace_size, - conv_desc.attribute.gfx90aFp16alt.GetBwd()}; + const auto invoke_params = miopen::conv::WrWInvokeParams{ + tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()}; auto sol = GetSolution(solv, ctx, problem); ASSERT_TRUE(sol.Succeeded()); @@ -179,8 +176,7 @@ struct ConvWrwSolverTest miopen::Allocator::ManageDataPtr in_dev; miopen::Allocator::ManageDataPtr wei_dev; miopen::Allocator::ManageDataPtr out_dev; - miopen::Allocator::ManageDataPtr workspace_dev; - size_t workspace_size; + Workspace wspace{}; miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect; bool test_skipped = false; }; diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index a09aab4209..d2b7d1a077 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -27,6 +27,7 @@ #ifndef GUARD_MIOPEN_TEST_LSTM_COMMON_HPP #define GUARD_MIOPEN_TEST_LSTM_COMMON_HPP +#include "workspace.hpp" #include "driver.hpp" #include "dropout_util.hpp" #include "get_handle.hpp" @@ -96,6 +97,8 @@ struct verify_backward_data_lstm std::vector initHidden; // HX std::vector initCell; // CX std::vector weights; + std::vector& RSVgpu; + std::vector& RSVcpu; std::vector batch_seq; int hiddenSize; int seqLength; @@ -115,8 +118,6 @@ struct verify_backward_data_lstm bool nodcx; bool use_dropout; bool use_seqPadding; - typename std::vector::iterator RSVgpu; - typename std::vector::iterator RSVcpu; verify_backward_data_lstm(miopenRNNDescriptor_t pRD, const std::vector& py, @@ -153,6 +154,8 @@ struct verify_backward_data_lstm initHidden(phx), initCell(pcx), weights(pW), + RSVgpu(pRSVgpu), + RSVcpu(pRSVcpu), batch_seq(pBS), hiddenSize(pHS), seqLength(pS), @@ -171,9 +174,7 @@ struct verify_backward_data_lstm nodhx(pnodhx), nodcx(pnodcx), use_dropout(puse_dropout), - use_seqPadding(puse_seqPadding), - RSVgpu(pRSVgpu.begin()), - RSVcpu(pRSVcpu.begin()) + use_seqPadding(puse_seqPadding) { if(!nohx) initHidden = phx; // this may be intentionally a nullptr @@ -248,6 +249,8 @@ struct verify_backward_weights_lstm std::vector input; // Y std::vector dy; // dY std::vector initHidden; // HX + std::vector reserveSpace_gpu; + std::vector reserveSpace_cpu; std::vector workSpace; std::vector batch_seq; int weightSize; @@ -264,8 +267,6 @@ struct verify_backward_weights_lstm bool nohx; bool use_dropout; bool use_seqPadding; - typename std::vector reserveSpace_gpu; - typename std::vector reserveSpace_cpu; verify_backward_weights_lstm(miopenRNNDescriptor_t pRD, const std::vector& px, @@ -291,6 +292,8 @@ struct verify_backward_weights_lstm : input(px), dy(pdy), initHidden(phx), + reserveSpace_gpu(pRSVgpu), + reserveSpace_cpu(pRSVcpu), workSpace(pWS), batch_seq(pBS), weightSize(pW), @@ -306,9 +309,7 @@ struct verify_backward_weights_lstm realHiddenSize(pHXZ), nohx(pnohx), use_dropout(puse_dropout), - use_seqPadding(puse_seqPadding), - reserveSpace_gpu(pRSVgpu), - reserveSpace_cpu(pRSVcpu) + use_seqPadding(puse_seqPadding) { if(!nohx) initHidden = phx; // this may be intentionally a nullptr @@ -533,8 +534,7 @@ struct verify_forward_infer_lstm : verify_forward_lstm auto&& handle = get_handle(); - size_t out_sz = 0; - size_t workSpaceSize = 0; + size_t out_sz = 0; std::vector inputCPPDescs; std::vector inputDescs; @@ -549,9 +549,10 @@ struct verify_forward_infer_lstm : verify_forward_lstm hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + size_t workspace_size = 0; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - std::vector workSpace(workSpaceSize / sizeof(T)); + Workspace wspace{workspace_size}; auto input_dev = handle.Write(input); @@ -565,8 +566,6 @@ struct verify_forward_infer_lstm : verify_forward_lstm auto cy = initCell; std::fill(cy.begin(), cy.end(), 0.); - auto workSpace_dev = handle.Write(workSpace); - std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; @@ -577,6 +576,9 @@ struct verify_forward_infer_lstm : verify_forward_lstm wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); + /// \todo: fix the handle.Write() calls below because they generate + /// temporary objects that may get destroyed before the + /// miopenRNNForwardInference call happens miopenRNNForwardInference(&handle, rnnDesc, seqLength, @@ -594,8 +596,8 @@ struct verify_forward_infer_lstm : verify_forward_lstm ((nohy) ? nullptr : handle.Write(hy).get()), &hiddenDesc, ((nocy) ? nullptr : handle.Write(cy).get()), - workSpace_dev.get(), - workSpaceSize); + wspace.ptr(), + wspace.size()); #if(MIO_LSTM_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -673,9 +675,10 @@ struct verify_forward_train_lstm : verify_forward_lstm using verify_forward_lstm::nocy; using verify_forward_lstm::use_seqPadding; + std::vector& RSVgpu; + std::vector& RSVcpu; + bool use_dropout; - typename std::vector::iterator RSVgpu; - typename std::vector::iterator RSVcpu; verify_forward_train_lstm(miopenRNNDescriptor_t pRD, const std::vector& px, @@ -700,7 +703,7 @@ struct verify_forward_train_lstm : verify_forward_lstm const bool pnocy = false, const bool puse_dropout = false, const bool puse_seqPadding = false) - : RSVgpu(pRSVgpu.begin()), RSVcpu(pRSVcpu.begin()) + : RSVgpu(pRSVgpu), RSVcpu(pRSVcpu) { input = px; initHidden = phx; @@ -861,7 +864,11 @@ struct verify_forward_train_lstm : verify_forward_lstm ChangeDataPadding(*packed_output, output, batch_seq, batch_seq[0], out_h, true); } - std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu); + if(reserveSpace.size() != RSVcpu.size()) + { + std::abort(); + } + std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin()); auto retSet = std::make_tuple( output, (nohy ? initHidden : hiddenState), (nocy ? initCell : cellState)); @@ -908,17 +915,15 @@ struct verify_forward_train_lstm : verify_forward_lstm std::fill(output.begin(), output.end(), static_cast(0)); auto output_dev = handle.Write(output); - size_t workSpaceSize = 0; + size_t workspace_size = 0; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + Workspace wspace{workspace_size}; + size_t reserveSpaceSize = 0; - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - - std::vector workSpace(workSpaceSize / sizeof(T)); - std::vector reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); - - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace); + reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1); + Workspace rspace{reserveSpaceSize}; auto weights_dev = handle.Write(weights); @@ -957,10 +962,10 @@ struct verify_forward_train_lstm : verify_forward_lstm ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, ((nocy) ? nullptr : cy_dev.get()), - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpaceSize); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_LSTM_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -969,9 +974,7 @@ struct verify_forward_train_lstm : verify_forward_lstm printf("GPU outdata[%d]: %f\n", i, outdata[i]); } #endif - reserveSpace = - handle.Read(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); - std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu); + rspace.ReadTo(RSVgpu); std::vector output_gpu = handle.Read(output_dev, output.size()); @@ -1049,7 +1052,7 @@ verify_backward_data_lstm::cpu() const int hy_h = hiddenSize; int bi_stride = bi * hy_h; int out_h = hiddenSize * ((dirMode != 0) ? 2 : 1); - size_t workSpaceSize; + size_t workspace_size; std::vector inputCPPDescs; std::vector inputDescs; @@ -1066,8 +1069,8 @@ verify_backward_data_lstm::cpu() const true, use_seqPadding); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + std::vector workSpace(workspace_size / sizeof(T)); std::vector dx(in_sz); std::vector dhx(initHidden.size()); std::vector dcx(initHidden.size()); @@ -1086,8 +1089,11 @@ verify_backward_data_lstm::cpu() const reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) / sizeof(T); } - std::vector reserveSpace(reserveSpaceSize); - std::copy(RSVcpu, RSVcpu + reserveSpaceSize, reserveSpace.begin()); + if(reserveSpaceSize != RSVcpu.size()) + { + std::abort(); + } + std::vector reserveSpace(RSVcpu); std::vector converted_dinput; std::vector converted_output; @@ -1119,7 +1125,7 @@ verify_backward_data_lstm::cpu() const packed_doutput = &converted_doutput; // WA - wa_workSpace.resize(workSpaceSize / sizeof(T) - (packedXInSize + packedYOutSize)); + wa_workSpace.resize(workspace_size / sizeof(T) - (packedXInSize + packedYOutSize)); wa_shifted_workSpace = &wa_workSpace; } else @@ -1186,7 +1192,7 @@ verify_backward_data_lstm::cpu() const workSpace.begin() + converted_doutput.size() + converted_dinput.size()); } - std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu); + std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin()); // TODO: remove workSpace auto retSet = @@ -1210,8 +1216,6 @@ verify_backward_data_lstm::gpu() const auto&& handle = get_handle(); - size_t workSpaceSize = 0; - std::vector inputCPPDescs; std::vector inputDescs; createTensorDescArray( @@ -1225,20 +1229,37 @@ verify_backward_data_lstm::gpu() const hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); - auto workSpace_dev = handle.Write(workSpace); - - size_t reserveSpaceSize; + size_t workspace_size = 0; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); + if(workspace_size % sizeof(T) != 0) + { + std::abort(); + } + Workspace wspace{}; + // Needed to zero out the workspace (happens in std::vector's constructor) + // or else this test fails verification when workspace is compared against the + // workspace returned by ::cpu method in this class + wspace.Write(std::vector(workspace_size / sizeof(T))); + // wspace.resize(workspace_size); + + size_t reserveSpaceSize = 0; miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); - std::vector reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); - std::copy(RSVgpu, RSVgpu + reserveSpace.size(), reserveSpace.begin()); + /// \todo: fix miopenGetRNNTrainingReserveSize to return a multiple of + /// sizeof(T) + // Needed because reserveSpaceSize returned is not a multiple of sizeof(T). + reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1); - auto yin_dev = handle.Write(yin); - auto dyin_dev = handle.Write(dy); - auto reserveSpace_dev = handle.Write(reserveSpace); - auto weights_dev = handle.Write(weights); + if(reserveSpaceSize != (RSVgpu.size() * sizeof(T))) + { + std::abort(); + } + Workspace rspace{}; + rspace.Write(RSVgpu); + + auto yin_dev = handle.Write(yin); + auto dyin_dev = handle.Write(dy); + auto weights_dev = handle.Write(weights); std::vector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); @@ -1284,18 +1305,18 @@ verify_backward_data_lstm::gpu() const ((nodhx) ? nullptr : dhx_dev.get()), &hiddenDesc, ((nodcx) ? nullptr : dcx_dev.get()), - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); - reserveSpace = handle.Read(reserveSpace_dev, reserveSpace.size()); - std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu); + assert(RSVgpu.size() * sizeof(T) == rspace.size()); + rspace.ReadTo(RSVgpu); // TODO: remove workSpace auto retSet = std::make_tuple(handle.Read(dx_dev, dx.size()), (nodhx ? initHidden : handle.Read(dhx_dev, dhx.size())), (nodcx ? initCell : handle.Read(dcx_dev, dcx.size())), - handle.Read(workSpace_dev, workSpace.size())); + wspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -1427,8 +1448,12 @@ std::vector verify_backward_weights_lstm::gpu() const hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace_gpu); + Workspace wspace{}; + wspace.Write(workSpace); + + Workspace rspace{}; + rspace.Write(reserveSpace_gpu); + std::vector dweights(weightSize); auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); @@ -1452,10 +1477,10 @@ std::vector verify_backward_weights_lstm::gpu() const dy_dev.get(), &weightDesc, dweights_dev.get(), - workSpace_dev.get(), - workSpace.size() * sizeof(T), - reserveSpace_dev.get(), - reserveSpace_gpu.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -1732,10 +1757,10 @@ struct lstm_basic_driver : test_driver false, usePadding); - size_t workSpaceSize; - miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + size_t workspace_size; + miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size); - size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize + + size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size + (2 * out_sz + in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) + (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz) + (nocx ? 0 : hx_sz) + (nocy ? 0 : hx_sz) + (nodcx ? 0 : hx_sz) + (nodcy ? 0 : hx_sz)) * @@ -1815,7 +1840,7 @@ struct lstm_basic_driver : test_driver #if(MIO_LSTM_TEST_DEBUG > 0) printf("Running backward weights LSTM.\n"); printf("reserve sz: %d, workSpace sz: %d, weight sz: %d\n", - reserveSpaceBwdData.size(), + rsvcpu.size(), workSpaceBwdData.size(), wei_sz); fflush(nullptr); diff --git a/test/main.cpp b/test/main.cpp index b48705754d..a1efb2eff8 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -23,8 +23,10 @@ * SOFTWARE. * *******************************************************************************/ +#include "get_handle.hpp" #include "test.hpp" #include "random.hpp" +#include "workspace.hpp" #include #include #include @@ -37,17 +39,8 @@ struct handle_fixture { miopenHandle_t handle{}; -#if MIOPEN_BACKEND_OPENCL - cl_command_queue q{}; -#endif - handle_fixture() - { - miopenCreate(&handle); -#if MIOPEN_BACKEND_OPENCL - miopenGetStream(handle, &q); -#endif - } + handle_fixture() { miopenCreate(&handle); } ~handle_fixture() { miopenDestroy(handle); } }; @@ -167,8 +160,6 @@ struct conv_forward : output_tensor_fixture { float alpha = 1, beta = 0; - // Setup OpenCL buffers - int n, h, c, w; STATUS(miopenGet4dTensorDescriptorLengths(inputTensor, &n, &c, &h, &w)); size_t sz_in = static_cast(n) * c * h * w; @@ -182,15 +173,12 @@ struct conv_forward : output_tensor_fixture size_t sz_fwd_workspace; STATUS(miopenConvolutionForwardGetWorkSpaceSize( handle, convFilter, inputTensor, convDesc, outputTensor, &sz_fwd_workspace)); - // OCL fails to allocate zero workspace. Let's allocate small workspace instead to simplify - // subsequent code. - if(sz_fwd_workspace == 0) - sz_fwd_workspace = 256; + + Workspace wspace{sz_fwd_workspace}; std::vector in(sz_in); std::vector wei(sz_wei); std::vector out(sz_out); - std::vector fwd_workspace(sz_fwd_workspace / 4); for(size_t i = 0; i < sz_in; i++) { @@ -201,60 +189,16 @@ struct conv_forward : output_tensor_fixture wei[i] = prng::gen_A_to_B(-0.5f, 0.5f) * 0.001f; } -#if MIOPEN_BACKEND_OPENCL - - cl_context ctx; - clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, nullptr); - - cl_int status = CL_SUCCESS; - cl_mem in_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_in, nullptr, &status); - cl_mem wei_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_wei, nullptr, nullptr); - cl_mem out_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, 4 * sz_out, nullptr, nullptr); - cl_mem fwd_workspace_dev = - clCreateBuffer(ctx, CL_MEM_READ_WRITE, sz_fwd_workspace, nullptr, nullptr); - - status = - clEnqueueWriteBuffer(q, in_dev, CL_TRUE, 0, 4 * sz_in, in.data(), 0, nullptr, nullptr); - status |= clEnqueueWriteBuffer( - q, wei_dev, CL_TRUE, 0, 4 * sz_wei, wei.data(), 0, nullptr, nullptr); - status |= clEnqueueWriteBuffer( - q, out_dev, CL_TRUE, 0, 4 * sz_out, out.data(), 0, nullptr, nullptr); - status |= clEnqueueWriteBuffer(q, - fwd_workspace_dev, - CL_TRUE, - 0, - sz_fwd_workspace, - fwd_workspace.data(), - 0, - nullptr, - nullptr); - EXPECT(status == CL_SUCCESS); - -#elif MIOPEN_BACKEND_HIP - - void* in_dev; - void* wei_dev; - void* out_dev; - void* fwd_workspace_dev; - - EXPECT(hipMalloc(&in_dev, 4 * sz_in) == hipSuccess); - EXPECT(hipMalloc(&wei_dev, 4 * sz_wei) == hipSuccess); - EXPECT(hipMalloc(&out_dev, 4 * sz_out) == hipSuccess); - EXPECT(hipMalloc(&fwd_workspace_dev, sz_fwd_workspace) == hipSuccess); - - EXPECT(hipMemcpy(in_dev, in.data(), 4 * sz_in, hipMemcpyHostToDevice) == hipSuccess); - EXPECT(hipMemcpy(wei_dev, wei.data(), 4 * sz_wei, hipMemcpyHostToDevice) == hipSuccess); - EXPECT(hipMemcpy(out_dev, out.data(), 4 * sz_out, hipMemcpyHostToDevice) == hipSuccess); - EXPECT(hipMemcpy(fwd_workspace_dev, - fwd_workspace.data(), - sz_fwd_workspace, - hipMemcpyHostToDevice) == hipSuccess); + auto& mhand = get_handle(); + + auto in_dev = mhand.Write(in); + auto wei_dev = mhand.Write(wei); + auto out_dev = mhand.Write(out); -#endif int value = 10; - STATUS(miopenSetTensor(handle, inputTensor, in_dev, &value)); + STATUS(miopenSetTensor(handle, inputTensor, in_dev.get(), &value)); - STATUS(miopenScaleTensor(handle, inputTensor, in_dev, &alpha)); + STATUS(miopenScaleTensor(handle, inputTensor, in_dev.get(), &alpha)); float time; @@ -276,32 +220,32 @@ struct conv_forward : output_tensor_fixture STATUS(miopenFindConvolutionForwardAlgorithm( used_handle, inputTensor, - in_dev, + in_dev.get(), convFilter, - wei_dev, + wei_dev.get(), convDesc, outputTensor, - out_dev, + out_dev.get(), 1, &ret_algo_count, &perf, - fwd_workspace_dev, - sz_fwd_workspace, + wspace.ptr(), + wspace.size(), 0)); // MD: Not performing exhaustiveSearch by default for now STATUS(miopenConvolutionForward(used_handle, &alpha, inputTensor, - in_dev, + in_dev.get(), convFilter, - wei_dev, + wei_dev.get(), convDesc, perf.fwd_algo, &beta, outputTensor, - out_dev, - fwd_workspace_dev, - sz_fwd_workspace)); + out_dev.get(), + wspace.ptr(), + wspace.size())); STATUS(miopenGetKernelTime(used_handle, &time)); @@ -318,20 +262,6 @@ struct conv_forward : output_tensor_fixture { CHECK(time == 0.0); } - -// Potential memory leak free memory at end of function -#if MIOPEN_BACKEND_OPENCL - clReleaseMemObject(in_dev); - clReleaseMemObject(wei_dev); - clReleaseMemObject(out_dev); - clReleaseMemObject(fwd_workspace_dev); - -#elif MIOPEN_BACKEND_HIP - hipFree(in_dev); - hipFree(wei_dev); - hipFree(out_dev); - hipFree(fwd_workspace_dev); -#endif } }; diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp index ae55b2e133..33bcb7164f 100644 --- a/test/pooling_common.hpp +++ b/test/pooling_common.hpp @@ -46,6 +46,7 @@ #include "tensor_holder.hpp" #include "verify.hpp" #include "cpu_conv.hpp" +#include "workspace.hpp" #define TEST_PADDING_MODE 0 // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) @@ -199,9 +200,10 @@ struct verify_forward_pooling auto out = get_output_tensor(filter, input); indices.resize(out.data.size(), 0); - auto in_dev = handle.Write(input.data); - auto out_dev = handle.Create(out.data.size()); - auto workspace_dev = handle.Write(indices); + auto in_dev = handle.Write(input.data); + auto out_dev = handle.Create(out.data.size()); + Workspace wspace{}; + wspace.Write(indices); float alpha = 1, beta = 0; filter.Forward(handle, @@ -212,10 +214,10 @@ struct verify_forward_pooling out.desc, out_dev.get(), true, - workspace_dev.get(), - indices.size() * sizeof(Index)); + wspace.ptr(), + wspace.size()); - indices = handle.Read(workspace_dev, indices.size()); + indices = wspace.Read>(); out.data = handle.Read(out_dev, out.data.size()); return out; } @@ -403,9 +405,8 @@ struct verify_backward_pooling auto out_dev = handle.Write(out.data); auto din_dev = handle.Create(dinput.data.size()); - // std::vector workspace(filter.GetWorkSpaceSize(out.desc)); - // auto workspace_dev = handle.Write(workspace); - auto workspace_dev = handle.Write(indices); + Workspace wspace{}; + wspace.Write(indices); float alpha = 1, beta = 0; filter.Backward(handle, @@ -423,7 +424,7 @@ struct verify_backward_pooling // dx dinput.desc, din_dev.get(), - workspace_dev.get()); + wspace.ptr()); dinput.data = handle.Read(din_dev, dinput.data.size()); return dinput; diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index 3ca771fc64..dc92a20318 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -27,6 +27,7 @@ #include "driver.hpp" #include "test.hpp" #include "verify.hpp" +#include "workspace.hpp" #include "get_handle.hpp" #include "tensor_holder.hpp" #include "random.hpp" @@ -60,28 +61,26 @@ struct verify_reduce_with_indices miopenReduceTensorIndices_t indicesOpt; miopenIndicesType_t indicesType; - verify_reduce_with_indices( // NOLINT (hicpp-member-init) - const miopen::ReduceTensorDescriptor& reduce_, - const tensor& input_, - const tensor& output_, - const tensor& workspace_, - const tensor& indices_, - float alpha_, - float beta_) + verify_reduce_with_indices(const miopen::ReduceTensorDescriptor& reduce_, + const tensor& input_, + const tensor& output_, + const tensor& workspace_, + const tensor& indices_, + float alpha_, + float beta_) + : reduce(reduce_), + input(input_), + output(output_), + workspace(workspace_), + indices(indices_), + alpha(alpha_), + beta(beta_), + reduceOp(reduce.reduceTensorOp_), + compTypeVal(reduce.reduceTensorCompType_), + nanOpt(reduce.reduceTensorNanOpt_), + indicesOpt(reduce.reduceTensorIndices_), + indicesType(reduce.reduceTensorIndicesType_) { - reduce = reduce_; - input = input_; - output = output_; - workspace = workspace_; - indices = indices_; - alpha = alpha_; - beta = beta_; - - reduceOp = reduce.reduceTensorOp_; - compTypeVal = reduce.reduceTensorCompType_; - nanOpt = reduce.reduceTensorNanOpt_; - indicesOpt = reduce.reduceTensorIndices_; - indicesType = reduce.reduceTensorIndicesType_; } tensor cpu() const @@ -343,10 +342,11 @@ struct verify_reduce_with_indices auto res = output; auto res_indices = indices; - auto indices_dev = handle.Write(indices.data); + Workspace idxspace{}; + idxspace.Write(indices.data); - std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T); - std::size_t indices_sizeInBytes = indices.desc.GetElementSize() * sizeof(int); + Workspace wspace{}; + wspace.Write(workspace.data); const double alpha64 = alpha; const double beta64 = beta; @@ -358,15 +358,13 @@ struct verify_reduce_with_indices ? static_cast(&beta64) : static_cast(&beta); - if(ws_sizeInBytes > 0) + if(wspace.size() > 0) { - auto workspace_dev = handle.Write(workspace.data); - reduce.ReduceTensor(get_handle(), - indices_dev.get(), - indices_sizeInBytes, - workspace_dev.get(), - ws_sizeInBytes, + idxspace.ptr(), + idxspace.size(), + wspace.ptr(), + wspace.size(), alphaPtr, input.desc, input_dev.get(), @@ -377,8 +375,8 @@ struct verify_reduce_with_indices else { reduce.ReduceTensor(get_handle(), - indices_dev.get(), - indices_sizeInBytes, + idxspace.ptr(), + idxspace.size(), nullptr, 0, alphaPtr, @@ -390,7 +388,7 @@ struct verify_reduce_with_indices }; res.data = handle.Read(output_dev, res.data.size()); - res_indices.data = handle.Read(indices_dev, res_indices.data.size()); + res_indices.data = idxspace.Read(); return (std::make_tuple(res, res_indices)); } @@ -647,7 +645,8 @@ struct verify_reduce_no_indices // replicate auto res = output; - std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T); + Workspace wspace{}; + wspace.Write(workspace.data); const double alpha64 = alpha; const double beta64 = beta; @@ -659,15 +658,13 @@ struct verify_reduce_no_indices ? static_cast(&beta64) : static_cast(&beta); - if(ws_sizeInBytes > 0) + if(wspace.size() > 0) { - auto workspace_dev = handle.Write(workspace.data); - reduce.ReduceTensor(get_handle(), nullptr, 0, - workspace_dev.get(), - ws_sizeInBytes, + wspace.ptr(), + wspace.size(), alphaPtr, input.desc, input_dev.get(), diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp index ef5c1088c3..854f682068 100644 --- a/test/rnn_vanilla_common.hpp +++ b/test/rnn_vanilla_common.hpp @@ -35,6 +35,7 @@ #include "verify.hpp" #include "rnn_util.hpp" #include "random.hpp" +#include "workspace.hpp" #include #include #include @@ -1436,8 +1437,7 @@ struct verify_forward_infer_rnn miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - - std::vector workSpace(workSpaceSize / sizeof(T)); + Workspace wspace{workSpaceSize}; auto input_dev = handle.Write(input); @@ -1450,8 +1450,6 @@ struct verify_forward_infer_rnn std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - auto workSpace_dev = handle.Write(workSpace); - std::vector hlens(3, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); hlens[1] = batch_seq[0]; @@ -1479,8 +1477,8 @@ struct verify_forward_infer_rnn ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize); + wspace.ptr(), + wspace.size()); #if(MIO_RNN_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -1708,11 +1706,13 @@ struct verify_forward_train_rnn miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); + Workspace wspace{workSpaceSize}; + miopenGetRNNTrainingReserveSize( &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize); + reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1); - std::vector workSpace(workSpaceSize / sizeof(T)); - std::vector reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T)); + Workspace rspace{reserveSpaceSize}; auto input_dev = handle.Write(input); @@ -1726,9 +1726,6 @@ struct verify_forward_train_rnn std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace); - std::vector hlens(3, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); hlens[1] = batch_seq[0]; @@ -1756,10 +1753,10 @@ struct verify_forward_train_rnn ((nohy) ? nullptr : hy_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpaceSize); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_RNN_TEST_DEBUG == 2) auto outdata = handle.Read(output_dev, output.size()); @@ -1769,10 +1766,9 @@ struct verify_forward_train_rnn } #endif - auto retSet = std::make_tuple( - handle.Read(output_dev, output.size()), - (nohy ? initHidden : handle.Read(hy_dev, hy.size())), - handle.Read(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T))); + auto retSet = std::make_tuple(handle.Read(output_dev, output.size()), + (nohy ? initHidden : handle.Read(hy_dev, hy.size())), + rspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -2002,15 +1998,15 @@ struct verify_backward_data_rnn miopen::deref(rnnDesc).dataType); miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize); - std::vector workSpace(workSpaceSize / sizeof(T)); - auto workSpace_dev = handle.Write(workSpace); + Workspace wspace{workSpaceSize}; miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz); auto yin_dev = handle.Write(yin); auto dyin_dev = handle.Write(dy); // auto dhyin_dev = handle.Write(dhy); - auto reserveSpace_dev = handle.Write(reserveSpace); - auto weights_dev = handle.Write(weights); + Workspace rspace{}; + rspace.Write(reserveSpace); + auto weights_dev = handle.Write(weights); // auto hx_dev = handle.Write(initHidden); std::vector hlens(3, 0); @@ -2054,15 +2050,15 @@ struct verify_backward_data_rnn ((nodhx) ? nullptr : dhx_dev.get()), &hiddenDesc, nullptr, - workSpace_dev.get(), - workSpaceSize, - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); auto retSet = std::make_tuple(handle.Read(dx_dev, dx.size()), (nodhx ? initHidden : handle.Read(dhx_dev, dhx.size())), - handle.Read(reserveSpace_dev, reserveSpace.size()), - handle.Read(workSpace_dev, workSpace.size())); + rspace.Read>(), + wspace.Read>()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); @@ -2257,8 +2253,11 @@ struct verify_backward_weights_rnn hiddenSize * ((dirMode != 0) ? 2 : 1), miopen::deref(rnnDesc).dataType); - auto workSpace_dev = handle.Write(workSpace); - auto reserveSpace_dev = handle.Write(reserveSpace); + Workspace wspace{}; + wspace.Write(workSpace); + Workspace rspace{}; + rspace.Write(reserveSpace); + std::vector dweights(weightSize); auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); @@ -2283,10 +2282,10 @@ struct verify_backward_weights_rnn dy_dev.get(), &weightDesc, dweights_dev.get(), - workSpace_dev.get(), - workSpace.size() * sizeof(T), - reserveSpace_dev.get(), - reserveSpace.size() * sizeof(T)); + wspace.ptr(), + wspace.size(), + rspace.ptr(), + rspace.size()); #if(MIO_RNN_TIME_EVERYTHING == 1) auto t_end = std::chrono::high_resolution_clock::now(); diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp index dc1a38f508..bf40e7ee38 100644 --- a/test/tensor_reorder.cpp +++ b/test/tensor_reorder.cpp @@ -38,6 +38,8 @@ #include "test.hpp" #include "driver.hpp" #include "random.hpp" +#include "get_handle.hpp" +#include "workspace.hpp" template <> struct miopen_type : std::integral_constant @@ -286,19 +288,6 @@ bool verify_tensor(tensor& t_gpu, tensor& t_cpu) struct tensor_reorder_base_driver : test_driver { - miopenHandle_t handle{}; -#if MIOPEN_BACKEND_OPENCL - cl_command_queue q{}; -#endif - - tensor_reorder_base_driver() - { - miopenCreate(&handle); -#if MIOPEN_BACKEND_OPENCL - miopenGetStream(handle, &q); -#endif - } - ~tensor_reorder_base_driver() { miopenDestroy(handle); } static std::vector get_dim_3_size() { return {1, 9}; } static std::vector get_dim_2_size() { return {1, 9}; } @@ -363,14 +352,14 @@ struct tensor_reorder_driver : tensor_reorder_base_driver // NOLINTBEGIN(clang-analyzer-cplusplus.NewDeleteLeaks) void run() { - auto run_reorder = [this](uint32_t dim_0, - uint32_t dim_1, - uint32_t dim_2, - uint32_t dim_3, - uint32_t order_0, - uint32_t order_1, - uint32_t order_2, - uint32_t order_3) { + auto run_reorder = [](uint32_t dim_0, + uint32_t dim_1, + uint32_t dim_2, + uint32_t dim_3, + uint32_t order_0, + uint32_t order_1, + uint32_t order_2, + uint32_t order_3) { int tensor_sz = dim_0 * dim_1 * dim_2 * dim_3; std::vector tensor_len({static_cast(dim_0), static_cast(dim_1), @@ -392,8 +381,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver tensor t_dst_gpu(tensor_len, tensor_strides); rand_tensor_integer(t_src); + auto& handle = get_handle(); miopen::ExecutionContext ctx; - ctx.SetStream(&miopen::deref(this->handle)); + ctx.SetStream(&handle); // ctx.SetupFloats(); auto reorder_sol = MakeTensorReorderAttributes(ctx, to_miopen_data_type::get(), @@ -406,36 +396,13 @@ struct tensor_reorder_driver : tensor_reorder_base_driver order_2, order_3); EXPECT(reorder_sol != nullptr); - size_t workspace = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz - : reorder_sol->GetOutputTensorSize(); -#if MIOPEN_BACKEND_OPENCL - cl_context cl_ctx; - clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &cl_ctx, nullptr); - cl_int status = CL_SUCCESS; - cl_mem src_dev = - clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, sizeof(T) * tensor_sz, nullptr, &status); - cl_mem dst_dev = clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, workspace, nullptr, nullptr); - status |= clEnqueueWriteBuffer(q, - src_dev, - CL_TRUE, - 0, - sizeof(T) * tensor_sz, - t_src.data.data(), - 0, - nullptr, - nullptr); - EXPECT(status == CL_SUCCESS); -#elif MIOPEN_BACKEND_HIP - void* src_dev; - void* dst_dev; - EXPECT(hipMalloc(&src_dev, sizeof(T) * tensor_sz) == hipSuccess); - EXPECT(hipMalloc(&dst_dev, workspace) == hipSuccess); - EXPECT(hipMemcpy( - src_dev, t_src.data.data(), sizeof(T) * tensor_sz, hipMemcpyHostToDevice) == - hipSuccess); -#endif - const auto invoke_param = reorder_invoke_param{ - DataCast(static_cast(src_dev)), DataCast(dst_dev)}; + size_t workspace_size = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz + : reorder_sol->GetOutputTensorSize(); + Workspace wspace{workspace_size}; + + auto src_dev = handle.Write(t_src.data); + + const auto invoke_param = reorder_invoke_param{src_dev.get(), wspace.ptr()}; std::vector opArgs = reorder_sol->GetKernelArg(); boost::optional invoker_factory( [=](const std::vector& kernels) mutable { @@ -451,10 +418,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver }); std::vector construction_params{ reorder_sol->GetKernelInfo()}; - const auto invoker = - miopen::deref(this->handle).PrepareInvoker(*invoker_factory, construction_params); + const auto invoker = handle.PrepareInvoker(*invoker_factory, construction_params); // run gpu - invoker(miopen::deref(this->handle), invoke_param); + invoker(handle, invoke_param); // run cpu cpu_reorder::run(t_dst.data.data(), t_src.data.data(), @@ -467,18 +433,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver order_2, order_3); invoker_factory = boost::none; -#if MIOPEN_BACKEND_OPENCL - status = clEnqueueReadBuffer( - q, dst_dev, CL_TRUE, 0, workspace, t_dst_gpu.data.data(), 0, nullptr, nullptr); - EXPECT(status == CL_SUCCESS); - clReleaseMemObject(dst_dev); - clReleaseMemObject(src_dev); -#elif MIOPEN_BACKEND_HIP - EXPECT(hipMemcpy(t_dst_gpu.data.data(), dst_dev, workspace, hipMemcpyDeviceToHost) == - hipSuccess); - hipFree(dst_dev); - hipFree(src_dev); -#endif + + t_dst_gpu.data = wspace.Read(); + // we expect excact match, since use integer bool valid_result = verify_tensor(t_dst_gpu, t_dst); std::cout << "[" << reorder_str::get(order_0, order_1, order_2, order_3) << ", b" diff --git a/test/workspace.hpp b/test/workspace.hpp new file mode 100644 index 0000000000..93522b1cd4 --- /dev/null +++ b/test/workspace.hpp @@ -0,0 +1,163 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include + +#include "get_handle.hpp" + +#define HIP_CHECK(exp) \ + if((exp) != hipSuccess) \ + { \ + MIOPEN_LOG_E(#exp "failed at line: " << __LINE__ << " in file: " << __FILE__); \ + } + +class Workspace +{ + + // RAII class for hip allocations + class GPUBuffer + { + public: + GPUBuffer() = default; + + explicit GPUBuffer(size_t num_bytes) : sz_(num_bytes) + { + if(num_bytes > 0) + { + HIP_CHECK(hipMalloc(&buf_, num_bytes)); + assert(buf_ != nullptr); + } + else + { + buf_ = nullptr; + } + } + + ~GPUBuffer() { FreeBuf(); } + + void* ptr() { return buf_; } + void* ptr() const { return buf_; } + + auto size() const { return sz_; } + + GPUBuffer(const GPUBuffer&) = delete; + GPUBuffer& operator=(const GPUBuffer&) = delete; + + GPUBuffer(GPUBuffer&& that) noexcept : buf_(that.buf_), sz_(that.sz_) + { + that.buf_ = nullptr; // take over ownership + that.sz_ = 0; + } + + GPUBuffer& operator=(GPUBuffer&& that) noexcept + { + FreeBuf(); + std::swap(this->buf_, that.buf_); + std::swap(this->sz_, that.sz_); + return *this; + } + + private: + void FreeBuf() + { + HIP_CHECK(hipFree(buf_)); + buf_ = nullptr; + sz_ = 0; + } + + void* buf_ = nullptr; + size_t sz_ = 0; + }; + + // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is + // passed to capture the size. Must call AdjustToSize() after calling such a method + size_t* SizePtr() { return &sz_; } + + void AdjustToSize() + { + if(sz_ != gpu_buf_.size()) + { + gpu_buf_ = GPUBuffer(sz_); + } + } + +public: + explicit Workspace(size_t sz = 0) : sz_(sz) { AdjustToSize(); } + + Workspace(const Workspace&) = delete; + Workspace& operator=(const Workspace&) = delete; + Workspace(Workspace&&) = default; + Workspace& operator=(Workspace&&) = default; + + size_t size() const { return sz_; } + + void resize(size_t sz_in_bytes) + { + sz_ = sz_in_bytes; + AdjustToSize(); + } + + auto ptr() const { return gpu_buf_.ptr(); } + + auto ptr() { return gpu_buf_.ptr(); } + + template + void Write(const V& vec) + { + using T = typename V::value_type; + resize(vec.size() * sizeof(T)); + HIP_CHECK(hipMemcpy(this->ptr(), &vec[0], size(), hipMemcpyHostToDevice)); + } + + template + void ReadTo(V& vec) const + { + using T = typename V::value_type; + if(vec.size() * sizeof(T) != size()) + { + MIOPEN_LOG_E("vector of wrong size passed"); + std::abort(); + } + HIP_CHECK(hipMemcpy(&vec[0], ptr(), size(), hipMemcpyDeviceToHost)); + } + + template + V Read() const + { + using T = typename V::value_type; + size_t num_elem = size() / sizeof(T); + V ret(num_elem); + ReadTo(ret); + return ret; + } + +private: + // miopen::Handle& handle_; + // miopen::Allocator::ManageDataPtr data_{}; + GPUBuffer gpu_buf_{}; + size_t sz_{}; +}; From c5a2384dc0f29682ed51aeccf9b981dbdf7e058f Mon Sep 17 00:00:00 2001 From: Chris Erb Date: Sat, 16 Dec 2023 14:25:15 -0600 Subject: [PATCH 4/4] [gtest] conversion for code coverage tests (#2580) --- test/CMakeLists.txt | 22 --- test/{immed_conv3d.cpp => conv3d.hpp} | 12 +- test/driver.hpp | 7 +- test/gtest/CMakeLists.txt | 2 +- test/gtest/conv3d_codecov.cpp | 172 +++++++++++++++++++++ test/gtest/conv_2d.hpp | 66 --------- test/gtest/conv_igemm_mlir_xdlops.cpp | 2 +- test/gtest/immed_conv2d_codecov.cpp | 206 ++++++++++++++++++++++++++ test/gtest/immed_conv3d_codecov.cpp | 206 ++++++++++++++++++++++++++ test/gtest/pooling2d_asymmetric.cpp | 136 +++++++++++++++++ test/gtest/pooling2d_codecov.cpp | 136 +++++++++++++++++ test/gtest/pooling2d_wide.cpp | 136 +++++++++++++++++ test/immed_conv2d.cpp | 62 -------- test/{pooling2d.cpp => pooling2d.hpp} | 2 - 14 files changed, 1005 insertions(+), 162 deletions(-) rename test/{immed_conv3d.cpp => conv3d.hpp} (91%) create mode 100644 test/gtest/conv3d_codecov.cpp delete mode 100644 test/gtest/conv_2d.hpp create mode 100644 test/gtest/immed_conv2d_codecov.cpp create mode 100644 test/gtest/immed_conv3d_codecov.cpp create mode 100644 test/gtest/pooling2d_asymmetric.cpp create mode 100644 test/gtest/pooling2d_codecov.cpp create mode 100644 test/gtest/pooling2d_wide.cpp delete mode 100644 test/immed_conv2d.cpp rename test/{pooling2d.cpp => pooling2d.hpp} (98%) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d35ed0c43f..ffaeb93228 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -673,20 +673,6 @@ function(add_custom_test NAME) set_tests_properties(${NAME} PROPERTIES DISABLED On) endif() endfunction() -if(${CODECOV_TEST}) - add_custom_test(test_conv3d_codecov - COMMAND $ ${MIOPEN_TEST_FLOAT_ARG} --input 2 4 4 4 4 --weights 2 4 1 1 1 --pads_strides_dilations 0 0 0 1 1 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS} - ) - add_custom_test(test_immed_conv2d_codecov - COMMAND $ ${MIOPEN_TEST_FLOAT_ARG} --input 2 2 14 14 --weights 8 2 3 3 --pads_strides_dilations 0 0 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS} - ) - add_custom_test(test_immed_conv3d_codecov - COMMAND $ ${MIOPEN_TEST_FLOAT_ARG} --input 1 4 4 4 4 --weights 2 4 3 3 3 --pads_strides_dilations 0 0 0 1 1 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS} - ) - add_custom_test(test_pooling2d_codecov - COMMAND $ ${MIOPEN_TEST_FLOAT_ARG} --input 1, 192, 28, 28 --lens 2 2 --strides 2 2 --pads 0 0 ${MIOPEN_TEST_FLAGS_ARGS} - ) -endif() if(${MIOPEN_TEST_WITH_MIOPENDRIVER}) add_custom_test(test_miopendriver_regression_issue_1576 SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED FLOAT_DISABLED HALF_ENABLED @@ -776,14 +762,6 @@ set(TEST_CONV_VERBOSE_F ${MIOPEN_TEST_FLOAT_ARG} --verbose --disable-backward-da set(TEST_CONV_VERBOSE_B ${MIOPEN_TEST_FLOAT_ARG} --verbose --disable-forward --disable-backward-weights) set(TEST_CONV_VERBOSE_W ${MIOPEN_TEST_FLOAT_ARG} --verbose --disable-forward --disable-backward-data) -add_custom_test(test_pooling2d_asymmetric SKIP_UNLESS_ALL HALF_ENABLED GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED - COMMAND $ ${MIOPEN_TEST_FLOAT_ARG} --all --dataset 1 --limit 0 ${MIOPEN_TEST_FLAGS_ARGS} -) - -add_custom_test(test_pooling2d_wide SKIP_UNLESS_ALL HALF_ENABLED GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED - COMMAND $ ${MIOPEN_TEST_FLOAT_ARG} --all --dataset 2 --limit 0 ${MIOPEN_TEST_FLAGS_ARGS} -) - set(IMPLICITGEMM_MLIR_ENV_F_XDLOPS ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvMlirIgemmFwdXdlops) set(IMPLICITGEMM_MLIR_ENV_B_XDLOPS ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvMlirIgemmBwdXdlops) set(IMPLICITGEMM_MLIR_ENV_W_XDLOPS ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvMlirIgemmWrWXdlops) diff --git a/test/immed_conv3d.cpp b/test/conv3d.hpp similarity index 91% rename from test/immed_conv3d.cpp rename to test/conv3d.hpp index a968cd6100..2b49c0f10e 100644 --- a/test/immed_conv3d.cpp +++ b/test/conv3d.hpp @@ -26,9 +26,9 @@ #include "conv_common.hpp" template -struct conv3d_driver : conv_driver +struct conv3d_driver : conv_driver { - conv3d_driver() : conv_driver() + conv3d_driver() : conv_driver() { this->add(this->input_dims, "input"); this->add(this->weight_tensor_dims, "weights"); @@ -37,16 +37,16 @@ struct conv3d_driver : conv_driver this->generate_data_limited(this->get_batch_sizes(), 1, {8})); this->add(this->input_channels, "input_channels", - this->generate_data_limited(this->get_input_channels(), 1, {2})); + this->generate_data_limited(this->get_input_channels(), 1, {32})); this->add(this->output_channels, "output_channels", - this->generate_data_limited(this->get_output_channels(), 1, {16})); + this->generate_data_limited(this->get_output_channels(), 1, {32})); this->add(this->spatial_dim_elements, "spatial_dim_elements", this->generate_data_limited(this->get_3d_spatial_dims(), 1, {16, 16, 16})); this->add(this->filter_dims, "filter_dims", - this->generate_data_limited(this->get_3d_filter_dims(), 2, {5, 5, 5})); + this->generate_data_limited(this->get_3d_filter_dims(), 2, {3, 3, 3})); this->add(this->pads_strides_dilations, "pads_strides_dilations", this->generate_data_limited(this->get_3d_pads_strides_dilations(), 2)); @@ -58,5 +58,3 @@ struct conv3d_driver : conv_driver this->add(this->out_layout, "out_layout", this->generate_data({"NCDHW"})); } }; - -int main(int argc, const char* argv[]) { test_drive(argc, argv); } diff --git a/test/driver.hpp b/test/driver.hpp index 825a0d4511..97896d6c5d 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -1242,6 +1242,11 @@ void test_drive_impl_1(std::string program_name, std::vector as) Driver d{}; d.program_name = program_name; + std::cout << program_name << " "; + for(const auto& str : as) + std::cout << str << " "; + std::cout << std::endl; + std::set keywords{ "--help", "-h", "--half", "--float", "--double", "--int8", "--bfloat16"}; d.parse(keyword_set{keywords}); @@ -1381,7 +1386,7 @@ template