diff --git a/include/clad/Differentiator/BuiltinDerivatives.h b/include/clad/Differentiator/BuiltinDerivatives.h index a31b239bb..557274a56 100644 --- a/include/clad/Differentiator/BuiltinDerivatives.h +++ b/include/clad/Differentiator/BuiltinDerivatives.h @@ -30,6 +30,11 @@ template struct ValueAndPushforward { } }; +template +ValueAndPushforward make_value_and_pushforward(T value, U pushforward) { + return {value, pushforward}; +} + template struct ValueAndAdjoint { T value; U adjoint; @@ -178,6 +183,25 @@ CUDA_HOST_DEVICE ValueAndPushforward floor_pushforward(T x, T /*d_x*/) { return {::std::floor(x), (T)0}; } +template +CUDA_HOST_DEVICE ValueAndPushforward atan2_pushforward(T y, T x, T d_y, + T d_x) { + return {::std::atan2(y, x), + -(y / ((x * x) + (y * y))) * d_x + x / ((x * x) + (y * y)) * d_y}; +} + +template +CUDA_HOST_DEVICE void atan2_pullback(T y, T x, U d_z, T* d_y, T* d_x) { + *d_y += x / ((x * x) + (y * y)) * d_z; + + *d_x += -(y / ((x * x) + (y * y))) * d_z; +} + +template +CUDA_HOST_DEVICE ValueAndPushforward acos_pushforward(T x, T d_x) { + return {::std::acos(x), ((-1) / (::std::sqrt(1 - x * x))) * d_x}; +} + template CUDA_HOST_DEVICE ValueAndPushforward ceil_pushforward(T x, T /*d_x*/) { return {::std::ceil(x), (T)0}; @@ -316,6 +340,9 @@ inline void free_pushforward(void* ptr, void* d_ptr) { // These are required because C variants of mathematical functions are // defined in global namespace. using std::abs_pushforward; +using std::acos_pushforward; +using std::atan2_pullback; +using std::atan2_pushforward; using std::ceil_pushforward; using std::cos_pushforward; using std::exp_pushforward; diff --git a/include/clad/Differentiator/Differentiator.h b/include/clad/Differentiator/Differentiator.h index 3a8f35faf..cca1cd5cf 100644 --- a/include/clad/Differentiator/Differentiator.h +++ b/include/clad/Differentiator/Differentiator.h @@ -125,8 +125,18 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { CUDA_ARGS CUDA_REST_ARGS Args&&... args) { #if defined(__CUDACC__) && !defined(__CUDA_ARCH__) if (CUDAkernel) { - void* argPtrs[] = {(void*)&args..., (void*)static_cast(nullptr)...}; - cudaLaunchKernel((void*)f, grid, block, argPtrs, shared_mem, stream); + constexpr size_t totalArgs = sizeof...(args) + sizeof...(Rest); + std::vector argPtrs; + argPtrs.reserve(totalArgs); + (argPtrs.push_back(static_cast(&args)), ...); + + void* null_param = nullptr; + for (size_t i = sizeof...(args); i < totalArgs; ++i) + argPtrs[i] = &null_param; + + cudaLaunchKernel((void*)f, grid, block, argPtrs.data(), shared_mem, + stream); + return return_type_t(); } else { return f(static_cast(args)..., static_cast(nullptr)...); } @@ -198,23 +208,17 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, FunctorType* functor = nullptr, bool CUDAkernel = false) - : m_Functor(functor), m_CUDAkernel(CUDAkernel) { - assert(f && "Must pass a non-0 argument."); - if (size_t length = GetLength(code)) { - m_Function = f; - char* temp = (char*)malloc(length + 1); - m_Code = temp; - while ((*temp++ = *code++)); - } else { - // clad did not place the derivative in this object. This can happen - // upon error of if clad was disabled. Diagnose. - printf("clad failed to place the generated derivative in the object\n"); - printf("Make sure calls to clad are within a #pragma clad ON region\n"); - - // Invalidate the placeholders. - m_Function = nullptr; - m_Code = nullptr; - } + : m_Function(f), m_Functor(functor), m_CUDAkernel(CUDAkernel) { +#ifndef __CLAD_SO_LOADED + static_assert(false, "clad doesn't appear to be loaded; make sure that " + "you pass clad.so to clang."); +#endif + + size_t length = GetLength(code); + char* temp = (char*)malloc(length + 1); + m_Code = temp; + while ((*temp++ = *code++)) + ; } /// Constructor overload for initializing `m_Functor` when functor /// is passed by reference. @@ -371,9 +375,6 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { template return_type_t execute_helper(ReturnType C::*f, Args&&... args) { - assert(m_Functor && - "No default object set, explicitly pass an object to " - "CladFunction::execute"); // `static_cast` is required here for perfect forwarding. return execute_with_default_args( DropArgs_t{}, f, *m_Functor, @@ -411,9 +412,8 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { differentiate(F fn, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { - assert(fn && "Must pass in a non-0 argument"); - return CladFunction>(derivedFn, - code); + return CladFunction>(derivedFn, + code); } /// Specialization for differentiating functors. @@ -454,9 +454,8 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { differentiate(F fn, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { - assert(fn && "Must pass in a non-0 argument"); - return CladFunction, true>( - derivedFn, code); + return CladFunction, true>( + derivedFn, code); } /// Generates function which computes gradient of the given function wrt the @@ -475,7 +474,6 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { gradient(F f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "", bool CUDAkernel = false) { - assert(f && "Must pass in a non-0 argument"); return CladFunction, true>( derivedFn /* will be replaced by gradient*/, code, nullptr, CUDAkernel); } @@ -512,9 +510,8 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { hessian(F f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { - assert(f && "Must pass in a non-0 argument"); - return CladFunction>( - derivedFn /* will be replaced by hessian*/, code); + return CladFunction>( + derivedFn /* will be replaced by hessian*/, code); } /// Specialization for differentiating functors. @@ -549,9 +546,8 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { jacobian(F f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { - assert(f && "Must pass in a non-0 argument"); - return CladFunction>( - derivedFn /* will be replaced by Jacobian*/, code); + return CladFunction>( + derivedFn /* will be replaced by Jacobian*/, code); } /// Specialization for differentiating functors. @@ -576,7 +572,6 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { estimate_error(F f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { - assert(f && "Must pass in a non-0 argument"); return CladFunction< DerivedFnType>(derivedFn /* will be replaced by estimation code*/, code); diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h index 40feb6723..ad9981bb1 100644 --- a/include/clad/Differentiator/ReverseModeVisitor.h +++ b/include/clad/Differentiator/ReverseModeVisitor.h @@ -396,6 +396,7 @@ namespace clad { StmtDiff VisitImplicitValueInitExpr(const clang::ImplicitValueInitExpr* IVIE); StmtDiff VisitCStyleCastExpr(const clang::CStyleCastExpr* CSCE); + StmtDiff VisitPseudoObjectExpr(const clang::PseudoObjectExpr* POE); StmtDiff VisitInitListExpr(const clang::InitListExpr* ILE); StmtDiff VisitIntegerLiteral(const clang::IntegerLiteral* IL); StmtDiff VisitMemberExpr(const clang::MemberExpr* ME); diff --git a/include/clad/Differentiator/STLBuiltins.h b/include/clad/Differentiator/STLBuiltins.h index 9f3cb42c0..40b562dc4 100644 --- a/include/clad/Differentiator/STLBuiltins.h +++ b/include/clad/Differentiator/STLBuiltins.h @@ -1,14 +1,19 @@ #ifndef CLAD_STL_BUILTINS_H #define CLAD_STL_BUILTINS_H +#include #include +#include #include +#include #include namespace clad { namespace custom_derivatives { namespace class_functions { +// vector forward mode + template void clear_pushforward(::std::vector* v, ::std::vector* d_v) { d_v->clear(); @@ -128,6 +133,181 @@ operator_subscript_pushforward(const ::std::vector* v, unsigned idx, return {(*v)[idx], (*d_v)[idx]}; } +template +ValueAndPushforward at_pushforward(::std::vector* v, unsigned idx, + ::std::vector* d_v, + unsigned d_idx) { + return {(*v)[idx], (*d_v)[idx]}; +} + +template +ValueAndPushforward +at_pushforward(const ::std::vector* v, unsigned idx, + const ::std::vector* d_v, unsigned d_idx) { + return {(*v)[idx], (*d_v)[idx]}; +} + +template +clad::ValueAndPushforward<::std::vector&, ::std::vector&> +operator_equal_pushforward(::std::vector* a, const ::std::vector& param, + ::std::vector* d_a, + const ::std::vector& d_param) noexcept { + (*a) = param; + (*d_a) = d_param; + return {*a, *d_a}; +} + +template +inline clad::ValueAndPushforward +front_pushforward(const ::std::vector* a, + const ::std::vector* d_a) noexcept { + return {a->front(), d_a->front()}; +} + +template +inline clad::ValueAndPushforward +front_pushforward(::std::vector* a, ::std::vector* d_a) noexcept { + return {a->front(), d_a->front()}; +} + +template +inline clad::ValueAndPushforward +back_pushforward(const ::std::vector* a, + const ::std::vector* d_a) noexcept { + return {a->back(), d_a->back()}; +} + +template +inline clad::ValueAndPushforward +back_pushforward(::std::vector* a, ::std::vector* d_a) noexcept { + return {a->back(), d_a->back()}; +} + +template +ValueAndPushforward::iterator, + typename ::std::vector::iterator> +begin_pushforward(::std::vector* v, ::std::vector* d_v) { + return {v->begin(), d_v->begin()}; +} + +template +ValueAndPushforward::iterator, + typename ::std::vector::iterator> +end_pushforward(::std::vector* v, ::std::vector* d_v) { + return {v->end(), d_v->end()}; +} + +template +ValueAndPushforward::iterator, + typename ::std::vector::iterator> +erase_pushforward(::std::vector* v, + typename ::std::vector::const_iterator pos, + ::std::vector* d_v, + typename ::std::vector::const_iterator d_pos) { + return {v->erase(pos), d_v->erase(d_pos)}; +} + +template +ValueAndPushforward::iterator, + typename ::std::vector::iterator> +insert_pushforward(::std::vector* v, + typename ::std::vector::const_iterator pos, U u, + ::std::vector* d_v, + typename ::std::vector::const_iterator d_pos, U d_u) { + return {v->insert(pos, u), d_v->insert(d_pos, d_u)}; +} + +template +ValueAndPushforward::iterator, + typename ::std::vector::iterator> +insert_pushforward(::std::vector* v, + typename ::std::vector::const_iterator pos, + ::std::initializer_list list, ::std::vector* d_v, + typename ::std::vector::const_iterator d_pos, + ::std::initializer_list d_list) { + return {v->insert(pos, list), d_v->insert(d_pos, d_list)}; +} + +template +ValueAndPushforward::iterator, + typename ::std::vector::iterator> +insert_pushforward(::std::vector* v, + typename ::std::vector::const_iterator pos, U first, + U last, ::std::vector* d_v, + typename ::std::vector::const_iterator d_pos, U d_first, + U d_last) { + return {v->insert(pos, first, last), d_v->insert(d_pos, d_first, d_last)}; +} + +template +void assign_pushforward(::std::vector* v, + typename ::std::vector::size_type n, const U& val, + ::std::vector* d_v, + typename ::std::vector::size_type /*d_n*/, + const U& d_val) { + v->assign(n, val); + d_v->assign(n, d_val); +} + +template +void assign_pushforward(::std::vector* v, U first, U last, + ::std::vector* d_v, U d_first, U d_last) { + v->assign(first, last); + d_v->assign(d_first, d_last); +} + +template +void assign_pushforward(::std::vector* v, ::std::initializer_list list, + ::std::vector* d_v, + ::std::initializer_list d_list) { + v->assign(list); + d_v->assign(d_list); +} + +template +void reserve_pushforward(::std::vector* v, + typename ::std::vector::size_type n, + ::std::vector* d_v, + typename ::std::vector::size_type /*d_n*/) { + v->reserve(n); + d_v->reserve(n); +} + +template +void shrink_to_fit_pushforward(::std::vector* v, ::std::vector* d_v) { + v->shrink_to_fit(); + d_v->shrink_to_fit(); +} + +template +void push_back_pushforward(::std::vector* v, U val, ::std::vector* d_v, + U d_val) { + v->push_back(val); + d_v->push_back(d_val); +} + +template +void pop_back_pushforward(::std::vector* v, ::std::vector* d_v) noexcept { + v->pop_back(); + d_v->pop_back(); +} + +template +clad::ValueAndPushforward<::std::size_t, ::std::size_t> +size_pushforward(const ::std::vector* v, + const ::std::vector* d_v) noexcept { + return {v->size(), 0}; +} + +template +clad::ValueAndPushforward<::std::size_t, ::std::size_t> +capacity_pushforward(const ::std::vector* v, + const ::std::vector* d_v) noexcept { + return {v->capacity(), 0}; +} + +// array forward mode + template constexpr clad::ValueAndPushforward operator_subscript_pushforward(::std::array* a, ::std::size_t i, @@ -195,20 +375,44 @@ back_pushforward(::std::array* a, ::std::array* d_a) noexcept { return {a->back(), d_a->back()}; } -template -void fill_pushforward(::std::array* a, const T& u, - ::std::array* d_a, const T& d_u) { +template +void fill_pushforward(::std::array* a, const U& u, + ::std::array* d_a, const U& d_u) { a->fill(u); d_a->fill(d_u); } +template +clad::ValueAndPushforward<::std::size_t, ::std::size_t> +size_pushforward(const ::std::array* a, + const ::std::array* d_a) noexcept { + return {a->size(), 0}; +} + +// vector reverse mode +// more can be found in tests: test/Gradient/STLCustomDerivatives.C + +template +void push_back_reverse_forw(::std::vector* v, U val, ::std::vector* d_v, + pU /*d_val*/) { + v->push_back(val); + d_v->push_back(0); +} + template void push_back_reverse_forw(::std::vector* v, U val, ::std::vector* d_v, - U d_val) { + U /*d_val*/) { v->push_back(val); d_v->push_back(0); } +template +void push_back_pullback(::std::vector* v, U val, ::std::vector* d_v, + pU* d_val) { + *d_val += d_v->back(); + d_v->pop_back(); +} + template void push_back_pullback(::std::vector* v, U val, ::std::vector* d_v, U* d_val) { @@ -231,6 +435,22 @@ void operator_subscript_pullback(::std::vector* vec, (*d_vec)[idx] += d_y; } +template +clad::ValueAndAdjoint +at_reverse_forw(::std::vector* vec, typename ::std::vector::size_type idx, + ::std::vector* d_vec, + typename ::std::vector::size_type d_idx) { + return {(*vec)[idx], (*d_vec)[idx]}; +} + +template +void at_pullback(::std::vector* vec, + typename ::std::vector::size_type idx, P d_y, + ::std::vector* d_vec, + typename ::std::vector::size_type* d_idx) { + (*d_vec)[idx] += d_y; +} + template ::clad::ValueAndAdjoint<::std::vector, ::std::vector> constructor_reverse_forw(::clad::ConstructorReverseForwTag<::std::vector>, @@ -253,6 +473,45 @@ void constructor_pullback(::std::vector* v, S count, U val, d_v->clear(); } +template +void assign_pullback(::std::vector* v, + typename ::std::vector::size_type n, U /*val*/, + ::std::vector* d_v, + typename ::std::vector::size_type* /*d_n*/, dU* d_val) { + for (typename ::std::vector::size_type i = 0; i < n; ++i) { + (*d_val) += (*d_v)[i]; + (*d_v)[i] = 0; + } +} + +template +void reserve_pullback(::std::vector* v, + typename ::std::vector::size_type n, + ::std::vector* d_v, + typename ::std::vector::size_type* /*d_n*/) noexcept {} + +template +void shrink_to_fit_pullback(::std::vector* /*v*/, + ::std::vector* /*d_v*/) noexcept {} + +template +void size_pullback(::std::vector* /*v*/, + ::std::vector* /*d_v*/) noexcept {} + +template +void capacity_pullback(::std::vector* /*v*/, + ::std::vector* /*d_v*/) noexcept {} + +template +void size_pullback(::std::vector* /*v*/, U /*d_y*/, + ::std::vector* /*d_v*/) noexcept {} + +template +void capacity_pullback(::std::vector* /*v*/, U /*d_y*/, + ::std::vector* /*d_v*/) noexcept {} + +// array reverse mode + template clad::ValueAndAdjoint operator_subscript_reverse_forw( ::std::array* arr, typename ::std::array::size_type idx, @@ -322,6 +581,9 @@ void front_pullback(::std::array* arr, } template void size_pullback(::std::array* a, ::std::array* d_a) noexcept {} +template +void size_pullback(::std::array* /*a*/, U /*d_y*/, + ::std::array* /*d_a*/) noexcept {} template ::clad::ValueAndAdjoint<::std::array, ::std::array> constructor_reverse_forw(::clad::ConstructorReverseForwTag<::std::array>, @@ -338,7 +600,89 @@ void constructor_pullback(::std::array* a, const ::std::array& arr, (*d_arr)[i] += (*d_a)[i]; } +// tuple forward mode + +template +clad::ValueAndPushforward<::std::tuple, ::std::tuple> +operator_equal_pushforward(::std::tuple* tu, + ::std::tuple&& in, + ::std::tuple* d_tu, + ::std::tuple&& d_in) noexcept { + ::std::tuple t1 = (*tu = in); + ::std::tuple t2 = (*d_tu = d_in); + return {t1, t2}; +} + } // namespace class_functions + +namespace std { + +// tie and maketuple forward mode + +// Helper functions for selecting subtuples +template <::std::size_t shift_amount, ::std::size_t... Is> +constexpr auto shift_sequence(IndexSequence) { + return IndexSequence{}; +} + +template +auto select_tuple_elements(const Tuple& tpl, IndexSequence) { + return ::std::make_tuple(::std::get(tpl)...); +} + +template auto first_half_tuple(const Tuple& tpl) { + // static_assert(::std::tuple_size::value % 2 == 0); + constexpr ::std::size_t half = ::std::tuple_size::value / 2; + + constexpr MakeIndexSequence first_half; + return select_tuple_elements(tpl, first_half); +} + +template auto second_half_tuple(const Tuple& tpl) { + // static_assert(::std::tuple_size::value % 2 == 0); + constexpr ::std::size_t half = ::std::tuple_size::value / 2; + + constexpr MakeIndexSequence first_half; + constexpr auto second_half = shift_sequence(first_half); + return select_tuple_elements(tpl, second_half); +} + +template +auto select_tuple_elements_tie(const Tuple& tpl, IndexSequence) { + return ::std::tie(::std::get(tpl)...); +} + +template auto first_half_tuple_tie(const Tuple& tpl) { + // static_assert(::std::tuple_size::value % 2 == 0); + constexpr ::std::size_t half = ::std::tuple_size::value / 2; + + constexpr MakeIndexSequence first_half; + return select_tuple_elements_tie(tpl, first_half); +} + +template auto second_half_tuple_tie(const Tuple& tpl) { + // static_assert(::std::tuple_size::value % 2 == 0); + constexpr ::std::size_t half = ::std::tuple_size::value / 2; + + constexpr MakeIndexSequence first_half; + constexpr auto second_half = shift_sequence(first_half); + return select_tuple_elements_tie(tpl, second_half); +} + +template auto tie_pushforward(Args&&... args) noexcept { + ::std::tuple t = ::std::tie(args...); + return clad::make_value_and_pushforward(first_half_tuple_tie(t), + second_half_tuple_tie(t)); +} + +template auto make_tuple_pushforward(Args... args) noexcept { + ::std::tuple t = ::std::make_tuple(args...); + return clad::make_value_and_pushforward(first_half_tuple(t), + second_half_tuple(t)); +} + +} // namespace std + } // namespace custom_derivatives } // namespace clad diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp index cf3b23ecd..b5df001d5 100644 --- a/lib/Differentiator/ReverseModeVisitor.cpp +++ b/lib/Differentiator/ReverseModeVisitor.cpp @@ -3282,6 +3282,12 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context, return {castExpr, castExprDiff}; } + StmtDiff + ReverseModeVisitor::VisitPseudoObjectExpr(const PseudoObjectExpr* POE) { + // Used for CUDA Builtins + return {Clone(POE), Clone(POE)}; + } + StmtDiff ReverseModeVisitor::VisitMemberExpr(const MemberExpr* ME) { auto baseDiff = VisitWithExplicitNoDfDx(ME->getBase()); auto* field = ME->getMemberDecl(); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 715af0971..a1d17f51d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -95,7 +95,7 @@ add_lit_testsuite(check-clad "Running the Clad regression tests" LIT ${LIT_COMMAND} PARAMS ${CLAD_TEST_PARAMS} DEPENDS ${CLAD_TEST_DEPS} - ARGS ${CLAD_TEST_EXTRA_ARGS} --show-skipped --show-unsupported + ARGS ${CLAD_TEST_EXTRA_ARGS} --verbose --show-skipped --show-unsupported ) set_target_properties(check-clad PROPERTIES FOLDER "Clad tests") diff --git a/test/CUDA/GradientKernels.cu b/test/CUDA/GradientKernels.cu index e67c74926..01da8a299 100644 --- a/test/CUDA/GradientKernels.cu +++ b/test/CUDA/GradientKernels.cu @@ -33,46 +33,314 @@ void fake_kernel(int *a) { *a *= *a; } +__global__ void add_kernel(int *out, int *in) { + int index = threadIdx.x; + out[index] += in[index]; +} + +// CHECK: void add_kernel_grad(int *out, int *in, int *_d_out, int *_d_in) { +//CHECK-NEXT: int _d_index = 0; +//CHECK-NEXT: int index0 = threadIdx.x; +//CHECK-NEXT: int _t0 = out[index0]; +//CHECK-NEXT: out[index0] += in[index0]; +//CHECK-NEXT: { +//CHECK-NEXT: out[index0] = _t0; +//CHECK-NEXT: int _r_d0 = _d_out[index0]; +//CHECK-NEXT: _d_in[index0] += _r_d0; +//CHECK-NEXT: } +//CHECK-NEXT: } + +__global__ void add_kernel_2(int *out, int *in) { + out[threadIdx.x] += in[threadIdx.x]; +} + +// CHECK: void add_kernel_2_grad(int *out, int *in, int *_d_out, int *_d_in) { +//CHECK-NEXT: int _t0 = out[threadIdx.x]; +//CHECK-NEXT: out[threadIdx.x] += in[threadIdx.x]; +//CHECK-NEXT: { +//CHECK-NEXT: out[threadIdx.x] = _t0; +//CHECK-NEXT: int _r_d0 = _d_out[threadIdx.x]; +//CHECK-NEXT: _d_in[threadIdx.x] += _r_d0; +//CHECK-NEXT: } +//CHECK-NEXT: } + +__global__ void add_kernel_3(int *out, int *in) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + out[index] += in[index]; +} + +// CHECK: void add_kernel_3_grad(int *out, int *in, int *_d_out, int *_d_in) { +//CHECK-NEXT: unsigned int _t1 = blockIdx.x; +//CHECK-NEXT: unsigned int _t0 = blockDim.x; +//CHECK-NEXT: int _d_index = 0; +//CHECK-NEXT: int index0 = threadIdx.x + _t1 * _t0; +//CHECK-NEXT: int _t2 = out[index0]; +//CHECK-NEXT: out[index0] += in[index0]; +//CHECK-NEXT: { +//CHECK-NEXT: out[index0] = _t2; +//CHECK-NEXT: int _r_d0 = _d_out[index0]; +//CHECK-NEXT: _d_in[index0] += _r_d0; +//CHECK-NEXT: } +//CHECK-NEXT:} + +__global__ void add_kernel_4(int *out, int *in) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < 5) { + int sum = 0; + // Each thread sums elements in steps of warpSize + for (int i = index; i < 5; i += warpSize) { + sum += in[i]; + } + out[index] = sum; + } +} + +// CHECK: void add_kernel_4_grad(int *out, int *in, int *_d_out, int *_d_in) { +//CHECK-NEXT: bool _cond0; +//CHECK-NEXT: int _d_sum = 0; +//CHECK-NEXT: int sum = 0; +//CHECK-NEXT: unsigned long _t2; +//CHECK-NEXT: int _d_i = 0; +//CHECK-NEXT: int i = 0; +//CHECK-NEXT: clad::tape _t3 = {}; +//CHECK-NEXT: clad::tape _t4 = {}; +//CHECK-NEXT: int _t5; +//CHECK-NEXT: unsigned int _t1 = blockIdx.x; +//CHECK-NEXT: unsigned int _t0 = blockDim.x; +//CHECK-NEXT: int _d_index = 0; +//CHECK-NEXT: int index0 = threadIdx.x + _t1 * _t0; +//CHECK-NEXT: { +//CHECK-NEXT: _cond0 = index0 < 5; +//CHECK-NEXT: if (_cond0) { +//CHECK-NEXT: sum = 0; +//CHECK-NEXT: _t2 = 0UL; +//CHECK-NEXT: for (i = index0; ; clad::push(_t3, i) , (i += warpSize)) { +//CHECK-NEXT: { +//CHECK-NEXT: if (!(i < 5)) +//CHECK-NEXT: break; +//CHECK-NEXT: } +//CHECK-NEXT: _t2++; +//CHECK-NEXT: clad::push(_t4, sum); +//CHECK-NEXT: sum += in[i]; +//CHECK-NEXT: } +//CHECK-NEXT: _t5 = out[index0]; +//CHECK-NEXT: out[index0] = sum; +//CHECK-NEXT: } +//CHECK-NEXT: } +//CHECK-NEXT: if (_cond0) { +//CHECK-NEXT: { +//CHECK-NEXT: out[index0] = _t5; +//CHECK-NEXT: int _r_d2 = _d_out[index0]; +//CHECK-NEXT: _d_out[index0] = 0; +//CHECK-NEXT: _d_sum += _r_d2; +//CHECK-NEXT: } +//CHECK-NEXT: { +//CHECK-NEXT: for (;; _t2--) { +//CHECK-NEXT: { +//CHECK-NEXT: if (!_t2) +//CHECK-NEXT: break; +//CHECK-NEXT: } +//CHECK-NEXT: { +//CHECK-NEXT: i = clad::pop(_t3); +//CHECK-NEXT: int _r_d0 = _d_i; +//CHECK-NEXT: } +//CHECK-NEXT: { +//CHECK-NEXT: sum = clad::pop(_t4); +//CHECK-NEXT: int _r_d1 = _d_sum; +//CHECK-NEXT: _d_in[i] += _r_d1; +//CHECK-NEXT: } +//CHECK-NEXT: } +//CHECK-NEXT: _d_index += _d_i; +//CHECK-NEXT: } +//CHECK-NEXT: } +//CHECK-NEXT:} + +__global__ void add_kernel_5(int *out, int *in) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < 5) { + int sum = 0; + // Calculate the total number of threads in the grid + int totalThreads = blockDim.x * gridDim.x; + // Each thread sums elements in steps of the total number of threads in the grid + for (int i = index; i < 5; i += totalThreads) { + sum += in[i]; + } + out[index] = sum; + } +} + +// CHECK: void add_kernel_5_grad(int *out, int *in, int *_d_out, int *_d_in) { +//CHECK-NEXT: bool _cond0; +//CHECK-NEXT: int _d_sum = 0; +//CHECK-NEXT: int sum = 0; +//CHECK-NEXT: unsigned int _t2; +//CHECK-NEXT: unsigned int _t3; +//CHECK-NEXT: int _d_totalThreads = 0; +//CHECK-NEXT: int totalThreads = 0; +//CHECK-NEXT: unsigned long _t4; +//CHECK-NEXT: int _d_i = 0; +//CHECK-NEXT: int i = 0; +//CHECK-NEXT: clad::tape _t5 = {}; +//CHECK-NEXT: clad::tape _t6 = {}; +//CHECK-NEXT: int _t7; +//CHECK-NEXT: unsigned int _t1 = blockIdx.x; +//CHECK-NEXT: unsigned int _t0 = blockDim.x; +//CHECK-NEXT: int _d_index = 0; +//CHECK-NEXT: int index0 = threadIdx.x + _t1 * _t0; +//CHECK-NEXT: { +//CHECK-NEXT: _cond0 = index0 < 5; +//CHECK-NEXT: if (_cond0) { +//CHECK-NEXT: sum = 0; +//CHECK-NEXT: _t3 = blockDim.x; +//CHECK-NEXT: _t2 = gridDim.x; +//CHECK-NEXT: totalThreads = _t3 * _t2; +//CHECK-NEXT: _t4 = 0UL; +//CHECK-NEXT: for (i = index0; ; clad::push(_t5, i) , (i += totalThreads)) { +//CHECK-NEXT: { +//CHECK-NEXT: if (!(i < 5)) +//CHECK-NEXT: break; +//CHECK-NEXT: } +//CHECK-NEXT: _t4++; +//CHECK-NEXT: clad::push(_t6, sum); +//CHECK-NEXT: sum += in[i]; +//CHECK-NEXT: } +//CHECK-NEXT: _t7 = out[index0]; +//CHECK-NEXT: out[index0] = sum; +//CHECK-NEXT: } +//CHECK-NEXT: } +//CHECK-NEXT: if (_cond0) { +//CHECK-NEXT: { +//CHECK-NEXT: out[index0] = _t7; +//CHECK-NEXT: int _r_d2 = _d_out[index0]; +//CHECK-NEXT: _d_out[index0] = 0; +//CHECK-NEXT: _d_sum += _r_d2; +//CHECK-NEXT: } +//CHECK-NEXT: { +//CHECK-NEXT: for (;; _t4--) { +//CHECK-NEXT: { +//CHECK-NEXT: if (!_t4) +//CHECK-NEXT: break; +//CHECK-NEXT: } +//CHECK-NEXT: { +//CHECK-NEXT: i = clad::pop(_t5); +//CHECK-NEXT: int _r_d0 = _d_i; +//CHECK-NEXT: _d_totalThreads += _r_d0; +//CHECK-NEXT: } +//CHECK-NEXT: { +//CHECK-NEXT: sum = clad::pop(_t6); +//CHECK-NEXT: int _r_d1 = _d_sum; +//CHECK-NEXT: _d_in[i] += _r_d1; +//CHECK-NEXT: } +//CHECK-NEXT: } +//CHECK-NEXT: _d_index += _d_i; +//CHECK-NEXT: } +//CHECK-NEXT: } +//CHECK-NEXT:} + +#define TEST(F, grid, block, shared_mem, use_stream, x, dx, N) \ + { \ + int *fives = (int*)malloc(N * sizeof(int)); \ + for(int i = 0; i < N; i++) { \ + fives[i] = 5; \ + } \ + int *ones = (int*)malloc(N * sizeof(int)); \ + for(int i = 0; i < N; i++) { \ + ones[i] = 1; \ + } \ + cudaMemcpy(x, fives, N * sizeof(int), cudaMemcpyHostToDevice); \ + cudaMemcpy(dx, ones, N * sizeof(int), cudaMemcpyHostToDevice); \ + auto test = clad::gradient(F); \ + if constexpr (use_stream) { \ + cudaStream_t cudaStream; \ + cudaStreamCreate(&cudaStream); \ + test.execute_kernel(grid, block, shared_mem, cudaStream, x, dx); \ + } \ + else { \ + test.execute_kernel(grid, block, x, dx); \ + } \ + cudaDeviceSynchronize(); \ + int *res = (int*)malloc(N * sizeof(int)); \ + cudaMemcpy(res, dx, N * sizeof(int), cudaMemcpyDeviceToHost); \ + for (int i = 0; i < (N - 1); i++) { \ + printf("%d, ", res[i]); \ + } \ + printf("%d\n", res[N-1]); \ + free(fives); \ + free(ones); \ + free(res); \ + } + + +#define TEST_2(F, grid, block, shared_mem, use_stream, args, y, x, dy, dx, N) \ + { \ + int *fives = (int*)malloc(N * sizeof(int)); \ + for(int i = 0; i < N; i++) { \ + fives[i] = 5; \ + } \ + int *zeros = (int*)malloc(N * sizeof(int)); \ + for(int i = 0; i < N; i++) { \ + zeros[i] = 0; \ + } \ + cudaMemcpy(x, fives, N * sizeof(int), cudaMemcpyHostToDevice); \ + cudaMemcpy(y, zeros, N * sizeof(int), cudaMemcpyHostToDevice); \ + cudaMemcpy(dy, fives, N * sizeof(int), cudaMemcpyHostToDevice); \ + cudaMemcpy(dx, zeros, N * sizeof(int), cudaMemcpyHostToDevice); \ + auto test = clad::gradient(F, args); \ + if constexpr (use_stream) { \ + cudaStream_t cudaStream; \ + cudaStreamCreate(&cudaStream); \ + test.execute_kernel(grid, block, shared_mem, cudaStream, y, x, dy, dx); \ + } \ + else { \ + test.execute_kernel(grid, block, y, x, dy, dx); \ + } \ + cudaDeviceSynchronize(); \ + int *res = (int*)malloc(N * sizeof(int)); \ + cudaMemcpy(res, dx, N * sizeof(int), cudaMemcpyDeviceToHost); \ + for (int i = 0; i < (N - 1); i++) { \ + printf("%d, ", res[i]); \ + } \ + printf("%d\n", res[N-1]); \ + free(fives); \ + free(zeros); \ + free(res); \ + } + + int main(void) { - int *a = (int*)malloc(sizeof(int)); - *a = 2; - int *d_a; + int *a, *d_a; + cudaMalloc(&a, sizeof(int)); cudaMalloc(&d_a, sizeof(int)); - cudaMemcpy(d_a, a, sizeof(int), cudaMemcpyHostToDevice); - - int *asquare = (int*)malloc(sizeof(int)); - *asquare = 1; - int *d_square; - cudaMalloc(&d_square, sizeof(int)); - cudaMemcpy(d_square, asquare, sizeof(int), cudaMemcpyHostToDevice); - auto test = clad::gradient(kernel); - dim3 grid(1); - dim3 block(1); - cudaStream_t cudaStream; - cudaStreamCreate(&cudaStream); - test.execute_kernel(grid, block, 0, cudaStream, d_a, d_square); + TEST(kernel, dim3(1), dim3(1), 0, false, a, d_a, 1); // CHECK-EXEC: 10 + TEST(kernel, dim3(1), dim3(1), 0, true, a, d_a, 1); // CHECK-EXEC: 10 - cudaDeviceSynchronize(); + auto error = clad::gradient(fake_kernel); + error.execute_kernel(dim3(1), dim3(1), a, d_a); // CHECK-EXEC: Use execute() for non-global CUDA kernels - cudaMemcpy(asquare, d_square, sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(a, d_a, sizeof(int), cudaMemcpyDeviceToHost); - printf("a = %d, a^2 = %d\n", *a, *asquare); // CHECK-EXEC: a = 2, a^2 = 4 + auto test = clad::gradient(kernel); + test.execute(a, d_a); // CHECK-EXEC: Use execute_kernel() for global CUDA kernels - auto error = clad::gradient(fake_kernel); - error.execute_kernel(grid, block, d_a, d_square); // CHECK-EXEC: Use execute() for non-global CUDA kernels + cudaFree(a); + cudaFree(d_a); - test.execute(d_a, d_square); // CHECK-EXEC: Use execute_kernel() for global CUDA kernels - cudaMemset(d_a, 5, 1); // first byte is set to 5 - cudaMemset(d_square, 1, 1); + int *dummy_in, *dummy_out, *d_out, *d_in; + cudaMalloc(&dummy_in, 5 * sizeof(int)); + cudaMalloc(&dummy_out, 5 * sizeof(int)); + cudaMalloc(&d_out, 5 * sizeof(int)); + cudaMalloc(&d_in, 5 * sizeof(int)); - test.execute_kernel(grid, block, d_a, d_square); - cudaDeviceSynchronize(); + TEST_2(add_kernel, dim3(1), dim3(5, 1, 1), 0, false, "in, out", dummy_out, dummy_in, d_out, d_in, 5); // CHECK-EXEC: 5, 5, 5, 5, 5 + TEST_2(add_kernel_2, dim3(1), dim3(5, 1, 1), 0, true, "in, out", dummy_out, dummy_in, d_out, d_in, 5); // CHECK-EXEC: 5, 5, 5, 5, 5 + TEST_2(add_kernel_3, dim3(5, 1, 1), dim3(1), 0, false, "in, out", dummy_out, dummy_in, d_out, d_in, 5); // CHECK-EXEC: 5, 5, 5, 5, 5 + TEST_2(add_kernel_4, dim3(1), dim3(5, 1, 1), 0, false, "in, out", dummy_out, dummy_in, d_out, d_in, 5); // CHECK-EXEC: 5, 5, 5, 5, 5 + TEST_2(add_kernel_5, dim3(2, 1, 1), dim3(1), 0, false, "in, out", dummy_out, dummy_in, d_out, d_in, 5); // CHECK-EXEC: 5, 5, 5, 5, 5 - cudaMemcpy(asquare, d_square, sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(a, d_a, sizeof(int), cudaMemcpyDeviceToHost); - printf("a = %d, a^2 = %d\n", *a, *asquare); // CHECK-EXEC: a = 5, a^2 = 10 + cudaFree(dummy_in); + cudaFree(dummy_out); + cudaFree(d_out); + cudaFree(d_in); return 0; } diff --git a/test/FirstDerivative/BuiltinDerivatives.C b/test/FirstDerivative/BuiltinDerivatives.C index e608473bc..0ff3b6e62 100644 --- a/test/FirstDerivative/BuiltinDerivatives.C +++ b/test/FirstDerivative/BuiltinDerivatives.C @@ -248,6 +248,44 @@ double f14(double x) { return __builtin_pow(x, 3); } +double f15(double y, double x) { + return std::atan2(y, x); +} + +//CHECK: {{float|double}} f15_darg0({{float|double}} y, {{float|double}} x) { +//CHECK-NEXT: {{float|double}} _d_y = 1; +//CHECK-NEXT: {{float|double}} _d_x = 0; +//CHECK-NEXT: {{.*}}ValueAndPushforward<{{float|double}}, {{float|double}}> _t0 = {{.*}}atan2_pushforward(y, x, _d_y, _d_x); +//CHECK-NEXT: return _t0.pushforward; +//CHECK-NEXT: } + +//CHECK: {{float|double}} f15_darg1({{float|double}} y, {{float|double}} x) { +//CHECK-NEXT: {{float|double}} _d_y = 0; +//CHECK-NEXT: {{float|double}} _d_x = 1; +//CHECK-NEXT: {{.*}}ValueAndPushforward<{{float|double}}, {{float|double}}> _t0 = {{.*}}atan2_pushforward(y, x, _d_y, _d_x); +//CHECK-NEXT: return _t0.pushforward; +//CHECK-NEXT: } + +void f15_grad(double y, double x, double *_d_y, double *_d_x); +//CHECK: void f15_grad(double y, double x, double *_d_y, double *_d_x) { +//CHECK: { +//CHECK-NEXT: double _r0 = 0{{.*}}; +//CHECK-NEXT: double _r1 = 0{{.*}}; +//CHECK-NEXT: {{.*}}atan2_pullback(y, x, 1, &_r0, &_r1); +//CHECK-NEXT: *_d_y += _r0; +//CHECK-NEXT: *_d_x += _r1; +//CHECK-NEXT: } +//CHECK-NEXT: } + +float f16(float x) { + return std::acos(x); +} +// CHECK: {{float|double}} f16_darg0({{float|double}} x) { +//CHECK-NEXT: {{float|double}} _d_x = 1; +//CHECK-NEXT: {{.*}}ValueAndPushforward<{{float|double}}, {{float|double}}> _t0 = {{.*}}acos_pushforward(x, _d_x); +//CHECK-NEXT: return _t0.pushforward; +//CHECK-NEXT: } + int main () { //expected-no-diagnostics float f_result[2]; double d_result[2]; @@ -326,5 +364,19 @@ int main () { //expected-no-diagnostics auto f14_ddarg0 = clad::differentiate<2>(f14, 0); printf("Result is = %f\n", f14_ddarg0.execute(1)); //CHECK-EXEC: Result is = 6.000000 + auto f15_darg0 = clad::differentiate(f15, 0); + printf("Result is = %f\n", f15_darg0.execute(4, 3)); //CHECK-EXEC: Result is = 0.120000 + + auto f15_darg1 = clad::differentiate(f15, 1); + printf("Result is = %f\n", f15_darg1.execute(4, 3)); //CHECK-EXEC: Result is = -0.160000 + + d_result[0] = d_result[1] = 0; + clad::gradient(f15); + f15_grad(4, 3, &d_result[0], &d_result[1]); + printf("Result is = {%f, %f}\n", d_result[0], d_result[1]); //CHECK-EXEC: Result is = {0.120000, -0.160000} + + auto f16_darg0 = clad::differentiate(f16, 0); + printf("Result is = %f\n", f16_darg0.execute(0.9)); //CHECK-EXEC: Result is = -2.294157 + return 0; } diff --git a/test/ForwardMode/STLCustomDerivatives.C b/test/ForwardMode/STLCustomDerivatives.C index cfb1f7813..7ee45affa 100644 --- a/test/ForwardMode/STLCustomDerivatives.C +++ b/test/ForwardMode/STLCustomDerivatives.C @@ -1,4 +1,4 @@ -// RUN: %cladclang %s -std=c++14 -I%S/../../include -oSTLCustomDerivatives.out | %filecheck %s +// RUN: %cladclang -std=c++14 %s -I%S/../../include -oSTLCustomDerivatives.out | %filecheck %s // RUN: ./STLCustomDerivatives.out | %filecheck_exec %s #include "clad/Differentiator/Differentiator.h" @@ -7,6 +7,7 @@ #include #include #include +#include #include "../TestUtils.h" #include "../PrintOverloads.h" @@ -115,6 +116,201 @@ double fnVec4(double u, double v) { // CHECK-NEXT: return _t1.pushforward * _t4 + _t3 * _t2.pushforward; // CHECK-NEXT: } +double fnVec5(double x, double y) { + std::vector v; + + v.reserve(10); + + double res = x*v.capacity(); + + v.push_back(x); + v.shrink_to_fit(); + res += x*v.capacity(); + + return res; // 11x +} + +// CHECK: double fnVec5_darg0(double x, double y) { +// CHECK-NEXT: double _d_x = 1; +// CHECK-NEXT: double _d_y = 0; +// CHECK-NEXT: std::vector _d_v; +// CHECK-NEXT: std::vector v; +// CHECK-NEXT: {{.*}}reserve_pushforward(&v, 10, &_d_v, 0); +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::size_t, ::std::size_t> _t0 = {{.*}}capacity_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}} &_t1 = _t0.value; +// CHECK-NEXT: double _d_res = _d_x * _t1 + x * _t0.pushforward; +// CHECK-NEXT: double res = x * _t1; +// CHECK-NEXT: {{.*}}push_back_pushforward(&v, x, &_d_v, _d_x); +// CHECK-NEXT: {{.*}}shrink_to_fit_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::size_t, ::std::size_t> _t2 = {{.*}}capacity_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}} &_t3 = _t2.value; +// CHECK-NEXT: _d_res += _d_x * _t3 + x * _t2.pushforward; +// CHECK-NEXT: res += x * _t3; +// CHECK-NEXT: return _d_res; +// CHECK-NEXT: } + +double fnVec6(double x, double y) { + std::vector v(3, y); + + v.pop_back(); + double res = v.size()*x; // res = 2x + + v.erase(v.begin()); + res += v.size()*x; // res = 3x + + std::vector w; + w = v; + w.clear(); + res += w.size()*x + v.size()*x; // res = 4x + + w.insert(w.end(), 5); + res += w.size()*x; // res = 5x + + w.insert(w.end(), {y, x, y}); + w.insert(w.end(), v.begin(), v.end()); + if (w[0] == 5 && w[1] == y && w[2] == x && w[3] == y && v.back() == w.back()) { // should always be true + res += w[2]; // res = 6x + } + + w.assign(2, y); + res += (w[0] == y && w[1] == y)*x; // res = 7x + + v[0] = x; + w.assign(v.begin(), v.end()); + res += w[0]; // res = 8x; + + w.assign({3*x, 2*x, 4*x}); + res += w[1]; // res = 10x; + + return res; // 10x +} + +// CHECK: double fnVec6_darg0(double x, double y) { +// CHECK-NEXT: double _d_x = 1; +// CHECK-NEXT: double _d_y = 0; +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::vector, ::std::vector > _t0 = {{.*}}constructor_pushforward(clad::ConstructorPushforwardTag >(), 3, y{{.*}}, 0, _d_y{{.*}}); +// CHECK-NEXT: std::vector _d_v(_t0.pushforward); +// CHECK-NEXT: std::vector v(_t0.value); +// CHECK-NEXT: {{.*}}pop_back_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::size_t, ::std::size_t> _t1 = {{.*}}size_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}} &_t2 = _t1.value; +// CHECK-NEXT: double _d_res = _t1.pushforward * x + _t2 * _d_x; +// CHECK-NEXT: double res = _t2 * x; +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t3 = {{.*}}begin_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t4 = {{.*}}erase_pushforward(&v, {{.*}}_t3.value{{.*}}, &_d_v, {{.*}}_t3.pushforward{{.*}}); +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::size_t, ::std::size_t> _t5 = {{.*}}size_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}} &_t6 = _t5.value; +// CHECK-NEXT: _d_res += _t5.pushforward * x + _t6 * _d_x; +// CHECK-NEXT: res += _t6 * x; +// CHECK-NEXT: std::vector _d_w; +// CHECK-NEXT: std::vector w; +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::vector &, ::std::vector &> _t7 = {{.*}}operator_equal_pushforward(&w, v, &_d_w, _d_v); +// CHECK-NEXT: {{.*}}clear_pushforward(&w, &_d_w); +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::size_t, ::std::size_t> _t8 = {{.*}}size_pushforward(&w, &_d_w); +// CHECK-NEXT: {{.*}} &_t9 = _t8.value; +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::size_t, ::std::size_t> _t10 = {{.*}}size_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}} &_t11 = _t10.value; +// CHECK-NEXT: _d_res += _t8.pushforward * x + _t9 * _d_x + _t10.pushforward * x + _t11 * _d_x; +// CHECK-NEXT: res += _t9 * x + _t11 * x; +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t12 = {{.*}}end_pushforward(&w, &_d_w); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t13 = {{.*}}insert_pushforward(&w, {{.*}}_t12.value{{.*}}, 5, &_d_w, {{.*}}_t12.pushforward{{.*}}, 0); +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::size_t, ::std::size_t> _t14 = {{.*}}size_pushforward(&w, &_d_w); +// CHECK-NEXT: {{.*}} &_t15 = _t14.value; +// CHECK-NEXT: _d_res += _t14.pushforward * x + _t15 * _d_x; +// CHECK-NEXT: res += _t15 * x; +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t16 = {{.*}}end_pushforward(&w, &_d_w); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t17 = {{.*}}insert_pushforward(&w, {{.*}}_t16.value{{.*}}, {y, x, y}, &_d_w, {{.*}}_t16.pushforward{{.*}}, {_d_y, _d_x, _d_y}); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t18 = {{.*}}end_pushforward(&w, &_d_w); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t19 = {{.*}}begin_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t20 = {{.*}}end_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t21 = {{.*}}insert_pushforward(&w, {{.*}}_t18.value{{.*}}, {{.*}}_t19.value{{.*}}, {{.*}}_t20.value{{.*}}, &_d_w, {{.*}}_t18.pushforward{{.*}}, {{.*}}_t19.pushforward{{.*}}, {{.*}}_t20.pushforward{{.*}}); +// CHECK-NEXT: if (w[0] == 5 && w[1] == y && w[2] == x && w[3] == y && v.back() == w.back()) { +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t22 = {{.*}}operator_subscript_pushforward(&w, 2, &_d_w, 0); +// CHECK-NEXT: _d_res += _t22.pushforward; +// CHECK-NEXT: res += _t22.value; +// CHECK-NEXT: } +// CHECK-NEXT: {{.*}}assign_pushforward(&w, 2, y, &_d_w, 0, _d_y); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t23 = {{.*}}operator_subscript_pushforward(&w, 0, &_d_w, 0); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t24 = {{.*}}operator_subscript_pushforward(&w, 1, &_d_w, 0); +// CHECK-NEXT: bool _t25 = ((_t23.value == y) && (_t24.value == y)); +// CHECK-NEXT: _d_res += false * x + _t25 * _d_x; +// CHECK-NEXT: res += _t25 * x; +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t26 = {{.*}}operator_subscript_pushforward(&v, 0, &_d_v, 0); +// CHECK-NEXT: _t26.pushforward = _d_x; +// CHECK-NEXT: _t26.value = x; +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t27 = {{.*}}begin_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t28 = {{.*}}end_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}assign_pushforward(&w, {{.*}}_t27.value{{.*}}, {{.*}}_t28.value{{.*}}, &_d_w, {{.*}}_t27.pushforward{{.*}}, {{.*}}_t28.pushforward{{.*}}); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t29 = {{.*}}operator_subscript_pushforward(&w, 0, &_d_w, 0); +// CHECK-NEXT: _d_res += _t29.pushforward; +// CHECK-NEXT: res += _t29.value; +// CHECK-NEXT: {{.*}}assign_pushforward(&w, {3 * x, 2 * x, 4 * x}, &_d_w, {0 * x + 3 * _d_x, 0 * x + 2 * _d_x, 0 * x + 4 * _d_x}); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t30 = {{.*}}operator_subscript_pushforward(&w, 1, &_d_w, 0); +// CHECK-NEXT: _d_res += _t30.pushforward; +// CHECK-NEXT: res += _t30.value; +// CHECK-NEXT: return _d_res; +// CHECK-NEXT: } + +double fnVec7(double x, double y) { + std::vector v; + for (size_t i = 0; i < 3; ++i) { + float fx = x; + v.push_back(fx); + } + double res = 0; + for (size_t i = 0; i < v.size(); ++i) { + v[i] = i * v.at(i); + res += v.at(i); + } + + const std::vector v2 = v; + + // result is the same as res, that is: 3x + return res + v.front() + v.back() - v[v.size()-1] + v2.at(0) + v2.front() + v2.back() - v2[v2.size()-1]; +} + +// CHECK: double fnVec7_darg0(double x, double y) { +// CHECK-NEXT: double _d_x = 1; +// CHECK-NEXT: double _d_y = 0; +// CHECK-NEXT: std::vector _d_v; +// CHECK-NEXT: std::vector v; +// CHECK-NEXT: { +// CHECK-NEXT: size_t _d_i = 0; +// CHECK-NEXT: for (size_t i = 0; i < 3; ++i) { +// CHECK-NEXT: float _d_fx = _d_x; +// CHECK-NEXT: float fx = x; +// CHECK-NEXT: {{.*}}push_back_pushforward(&v, static_cast(fx), &_d_v, static_cast(_d_fx)); +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: double _d_res = 0; +// CHECK-NEXT: double res = 0; +// CHECK-NEXT: { +// CHECK-NEXT: size_t _d_i = 0; +// CHECK-NEXT: for (size_t i = 0; i < v.size(); ++i) { +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t0 = {{.*}}operator_subscript_pushforward(&v, i, &_d_v, _d_i); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t1 = {{.*}}at_pushforward(&v, i, &_d_v, _d_i); +// CHECK-NEXT: double &_t2 = _t1.value; +// CHECK-NEXT: _t0.pushforward = _d_i * _t2 + i * _t1.pushforward; +// CHECK-NEXT: _t0.value = i * _t2; +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t3 = {{.*}}at_pushforward(&v, i, &_d_v, _d_i); +// CHECK-NEXT: _d_res += _t3.pushforward; +// CHECK-NEXT: res += _t3.value; +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: const std::vector _d_v2 = _d_v; +// CHECK-NEXT: const std::vector v2 = v; +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t4 = {{.*}}front_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t5 = {{.*}}back_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::size_t, ::std::size_t> _t6 = {{.*}}size_pushforward(&v, &_d_v); +// CHECK-NEXT: {{.*}}ValueAndPushforward<{{.*}}> _t7 = {{.*}}operator_subscript_pushforward(&v, _t6.value - 1, &_d_v, _t6.pushforward - 0); +// CHECK-NEXT: {{.*}}ValueAndPushforward _t8 = {{.*}}at_pushforward(&v2, 0, &_d_v2, 0); +// CHECK-NEXT: {{.*}}ValueAndPushforward _t9 = {{.*}}front_pushforward(&v2, &_d_v2); +// CHECK-NEXT: {{.*}}ValueAndPushforward _t10 = {{.*}}back_pushforward(&v2, &_d_v2); +// CHECK-NEXT: {{.*}}ValueAndPushforward< ::std::size_t, ::std::size_t> _t11 = {{.*}}size_pushforward(&v2, &_d_v2); +// CHECK-NEXT: {{.*}}ValueAndPushforward _t12 = {{.*}}operator_subscript_pushforward(&v2, _t11.value - 1, &_d_v2, _t11.pushforward - 0); +// CHECK-NEXT: return _d_res + _t4.pushforward + _t5.pushforward - _t7.pushforward + _t8.pushforward + _t9.pushforward + _t10.pushforward - _t12.pushforward; +// CHECK-NEXT: } + double fnArr1(double x) { std::array a; a.fill(x); @@ -181,18 +377,53 @@ double fnArr2(double x) { //CHECK-NEXT: return (_t0.pushforward * _t3 + _t2 * _t1.pushforward) * _t6 + _t5 * _t4.pushforward; //CHECK-NEXT: } +auto pack(double x) { + return std::make_tuple(x, 2*x, 3*x); +} + +double fnTuple1(double x, double y) { + double u, v = 288*x, w; + + std::tie(u, v, w) = pack(x+y); + + return v; +} // = 2x + 2y + +//CHECK: double fnTuple1_darg0(double x, double y) { +//CHECK-NEXT: double _d_x = 1; +//CHECK-NEXT: double _d_y = 0; +//CHECK-NEXT: double _d_u, _d_v = 0 * x + 288 * _d_x, _d_w; +//CHECK-NEXT: double u, v = 288 * x, w; +//CHECK-NEXT: clad::ValueAndPushforward, tuple > _t0 = clad::custom_derivatives::std::tie_pushforward(u, v, w, _d_u, _d_v, _d_w); +//CHECK-NEXT: clad::ValueAndPushforward<{{.*}}> _t1 = pack_pushforward(x + y, _d_x + _d_y); +//CHECK-NEXT: clad::ValueAndPushforward<{{.*}}> _t2 = clad::custom_derivatives::class_functions::operator_equal_pushforward(&_t0.value, static_cast &&>(_t1.value), &_t0.pushforward, static_cast &&>(_t1.pushforward)); +//CHECK-NEXT: return _d_v; +//CHECK-NEXT: } +//CHECK: clad::ValueAndPushforward<{{.*}}> pack_pushforward({{.*}}) { +//CHECK-NEXT: clad::ValueAndPushforward, tuple > _t0 = clad::custom_derivatives::std::make_tuple_pushforward(x, 2 * x, 3 * x, _d_x, 0 * x + 2 * _d_x, 0 * x + 3 * _d_x); +//CHECK-NEXT: return {_t0.value, _t0.pushforward}; +//CHECK-NEXT: } + int main() { INIT_DIFFERENTIATE(fnVec1, "u"); INIT_DIFFERENTIATE(fnVec2, "u"); INIT_DIFFERENTIATE(fnVec3, "u"); INIT_DIFFERENTIATE(fnVec4, "u"); + INIT_DIFFERENTIATE(fnVec5, "x"); + INIT_DIFFERENTIATE(fnVec6, "x"); + INIT_DIFFERENTIATE(fnVec7, "x"); INIT_DIFFERENTIATE(fnArr1, "x"); INIT_DIFFERENTIATE(fnArr2, "x"); + INIT_DIFFERENTIATE(fnTuple1, "x"); TEST_DIFFERENTIATE(fnVec1, 3, 5); // CHECK-EXEC: {10.00} TEST_DIFFERENTIATE(fnVec2, 3, 5); // CHECK-EXEC: {5.00} TEST_DIFFERENTIATE(fnVec3, 3, 5); // CHECK-EXEC: {2.00} TEST_DIFFERENTIATE(fnVec4, 3, 5); // CHECK-EXEC: {30.00} + TEST_DIFFERENTIATE(fnVec5, 3, 4); // CHECK-EXEC: {11.00} + TEST_DIFFERENTIATE(fnVec6, 3, 4); // CHECK-EXEC: {10.00} + TEST_DIFFERENTIATE(fnVec7, 3, 4); // CHECK-EXEC: {3.00} TEST_DIFFERENTIATE(fnArr1, 3); // CHECK-EXEC: {3.00} TEST_DIFFERENTIATE(fnArr2, 3); // CHECK-EXEC: {108.00} + TEST_DIFFERENTIATE(fnTuple1, 3, 4); // CHECK-EXEC: {2.00} } diff --git a/test/ForwardMode/UserDefinedTypes.C b/test/ForwardMode/UserDefinedTypes.C index e0fca6587..fc7d6c865 100644 --- a/test/ForwardMode/UserDefinedTypes.C +++ b/test/ForwardMode/UserDefinedTypes.C @@ -1,8 +1,6 @@ -// RUN: %cladclang %s -I%S/../../include -oUserDefinedTypes.out | %filecheck %s +// RUN: %cladclang -std=c++14 %s -I%S/../../include -oUserDefinedTypes.out | %filecheck %s // RUN: ./UserDefinedTypes.out | %filecheck_exec %s -// XFAIL: asserts - #include "clad/Differentiator/Differentiator.h" #include "clad/Differentiator/STLBuiltins.h" diff --git a/test/Gradient/STLCustomDerivatives.C b/test/Gradient/STLCustomDerivatives.C index ba89aa34d..adb1981cd 100644 --- a/test/Gradient/STLCustomDerivatives.C +++ b/test/Gradient/STLCustomDerivatives.C @@ -147,6 +147,43 @@ double fn18(double x, double y) { return a[1]; } +double fn19(double x, double y) { + std::vector v; + for (size_t i = 0; i < 3; ++i) { + v.push_back(x); + } + double res = 0; + for (size_t i = 0; i < v.size(); ++i) { + res += v.at(i); + } + + v.assign(3, 0); + v.assign(2, y); + + return res + v[0] + v[1] + v[2]; // 3x+2y +} + +double fn20(double x, double y) { + std::vector v; + + v.reserve(10); + + double res = x*v.capacity(); + + v.push_back(x); + v.shrink_to_fit(); + res += y*v.capacity() + x*v.size(); + + return res; // 11x+y +} + +double fn21(double x, double y) { + std::vector a; + a.push_back(0); + a[0] = x*x; + return a[0]; +} + int main() { double d_i, d_j; INIT_GRADIENT(fn10); @@ -158,6 +195,9 @@ int main() { INIT_GRADIENT(fn16); INIT_GRADIENT(fn17); INIT_GRADIENT(fn18); + INIT_GRADIENT(fn19); + INIT_GRADIENT(fn20); + INIT_GRADIENT(fn21); TEST_GRADIENT(fn10, /*numOfDerivativeArgs=*/2, 3, 5, &d_i, &d_j); // CHECK-EXEC: {1.00, 1.00} TEST_GRADIENT(fn11, /*numOfDerivativeArgs=*/2, 3, 5, &d_i, &d_j); // CHECK-EXEC: {2.00, 1.00} @@ -168,6 +208,9 @@ int main() { TEST_GRADIENT(fn16, /*numOfDerivativeArgs=*/2, 3, 4, &d_i, &d_j); // CHECK-EXEC: {108.00, 27.00} TEST_GRADIENT(fn17, /*numOfDerivativeArgs=*/2, 3, 4, &d_i, &d_j); // CHECK-EXEC: {4.00, 2.00} TEST_GRADIENT(fn18, /*numOfDerivativeArgs=*/2, 3, 4, &d_i, &d_j); // CHECK-EXEC: {2.00, 0.00} + TEST_GRADIENT(fn19, /*numOfDerivativeArgs=*/2, 3, 4, &d_i, &d_j); // CHECK-EXEC: {3.00, 2.00} + TEST_GRADIENT(fn20, /*numOfDerivativeArgs=*/2, 3, 4, &d_i, &d_j); // CHECK-EXEC: {11.00, 1.00} + TEST_GRADIENT(fn21, /*numOfDerivativeArgs=*/2, 3, 4, &d_i, &d_j); // CHECK-EXEC: {6.00, 0.00} } // CHECK: void fn10_grad(double u, double v, double *_d_u, double *_d_v) { @@ -659,3 +702,181 @@ int main() { // CHECK-NEXT: {{.*}}operator_subscript_pullback(&_t0, 1, 0., &_d_a, &_r0); // CHECK-NEXT: } // CHECK-NEXT: } + +// CHECK: void fn19_grad(double x, double y, double *_d_x, double *_d_y) { +// CHECK-NEXT: size_t _d_i = {{0U|0UL|0}}; +// CHECK-NEXT: size_t i = {{0U|0UL|0}}; +// CHECK-NEXT: {{.*}}tape _t1 = {}; +// CHECK-NEXT: {{.*}}tape<{{.*}}vector > _t2 = {}; +// CHECK-NEXT: size_t _d_i0 = {{0U|0UL|0}}; +// CHECK-NEXT: size_t i0 = {{0U|0UL|0}}; +// CHECK-NEXT: {{.*}}tape<{{.*}}vector > _t4 = {}; +// CHECK-NEXT: {{.*}}tape _t5 = {}; +// CHECK-NEXT: {{.*}}tape<{{.*}}vector > _t6 = {}; +// CHECK-NEXT: {{.*}}vector _d_v({}); +// CHECK-NEXT: {{.*}}vector v; +// CHECK-NEXT: {{.*}} _t0 = {{0U|0UL|0}}; +// CHECK-NEXT: for (i = 0; ; ++i) { +// CHECK-NEXT: { +// CHECK-NEXT: if (!(i < 3)) +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: _t0++; +// CHECK-NEXT: {{.*}}push(_t1, x); +// CHECK-NEXT: {{.*}}push(_t2, v); +// CHECK-NEXT: {{.*}}push_back_reverse_forw(&v, x, &_d_v, *_d_x); +// CHECK-NEXT: } +// CHECK-NEXT: double _d_res = 0.; +// CHECK-NEXT: double res = 0; +// CHECK-NEXT: {{.*}} _t3 = {{0U|0UL|0}}; +// CHECK-NEXT: for (i0 = 0; ; ++i0) { +// CHECK-NEXT: { +// CHECK-NEXT: { +// CHECK-NEXT: {{.*}}push(_t4, v); +// CHECK-NEXT: } +// CHECK-NEXT: if (!(i0 < v.size())) +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: _t3++; +// CHECK-NEXT: {{.*}}push(_t5, res); +// CHECK-NEXT: {{.*}}push(_t6, v); +// CHECK-NEXT: {{.*}}ValueAndAdjoint _t7 = {{.*}}at_reverse_forw(&v, i0, &_d_v, _r0); +// CHECK-NEXT: res += _t7.value; +// CHECK-NEXT: } +// CHECK-NEXT: {{.*}}vector _t8 = v; +// CHECK-NEXT: v.assign(3, 0); +// CHECK-NEXT: double _t9 = y; +// CHECK-NEXT: {{.*}}vector _t10 = v; +// CHECK-NEXT: v.assign(2, y); +// CHECK-NEXT: {{.*}}vector _t11 = v; +// CHECK-NEXT: {{.*}}ValueAndAdjoint _t12 = {{.*}}operator_subscript_reverse_forw(&v, 0, &_d_v, _r4); +// CHECK-NEXT: {{.*}}vector _t13 = v; +// CHECK-NEXT: {{.*}}ValueAndAdjoint _t14 = {{.*}}operator_subscript_reverse_forw(&v, 1, &_d_v, _r5); +// CHECK-NEXT: {{.*}}vector _t15 = v; +// CHECK-NEXT: {{.*}}ValueAndAdjoint _t16 = {{.*}}operator_subscript_reverse_forw(&v, 2, &_d_v, _r6); +// CHECK-NEXT: { +// CHECK-NEXT: _d_res += 1; +// CHECK-NEXT: {{.*}}size_type _r4 = {{0U|0UL|0}}; +// CHECK-NEXT: {{.*}}operator_subscript_pullback(&_t11, 0, 1, &_d_v, &_r4); +// CHECK-NEXT: {{.*}}size_type _r5 = {{0U|0UL|0}}; +// CHECK-NEXT: {{.*}}operator_subscript_pullback(&_t13, 1, 1, &_d_v, &_r5); +// CHECK-NEXT: {{.*}}size_type _r6 = {{0U|0UL|0}}; +// CHECK-NEXT: {{.*}}operator_subscript_pullback(&_t15, 2, 1, &_d_v, &_r6); +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: y = _t9; +// CHECK-NEXT: {{.*}}size_type _r3 = {{0U|0UL|0}}; +// CHECK-NEXT: {{.*}}assign_pullback(&_t10, 2, _t9, &_d_v, &_r3, &*_d_y); +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: {{.*}}size_type _r1 = {{0U|0UL|0}}; +// CHECK-NEXT: {{.*}}value_type _r2 = 0.; +// CHECK-NEXT: {{.*}}assign_pullback(&_t8, 3, 0, &_d_v, &_r1, &_r2); +// CHECK-NEXT: } +// CHECK-NEXT: for (;; _t3--) { +// CHECK-NEXT: { +// CHECK-NEXT: { +// CHECK-NEXT: {{.*}}size_pullback(&{{.*}}back(_t4), &_d_v); +// CHECK-NEXT: {{.*}}pop(_t4); +// CHECK-NEXT: } +// CHECK-NEXT: if (!_t3) +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: --i0; +// CHECK-NEXT: { +// CHECK-NEXT: res = {{.*}}pop(_t5); +// CHECK-NEXT: double _r_d0 = _d_res; +// CHECK-NEXT: size_t _r0 = {{0U|0UL|0}}; +// CHECK-NEXT: {{.*}}at_pullback(&{{.*}}back(_t6), i0, _r_d0, &_d_v, &_r0); +// CHECK-NEXT: _d_i0 += _r0; +// CHECK-NEXT: {{.*}}pop(_t6); +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: for (;; _t0--) { +// CHECK-NEXT: { +// CHECK-NEXT: if (!_t0) +// CHECK-NEXT: break; +// CHECK-NEXT: } +// CHECK-NEXT: --i; +// CHECK-NEXT: { +// CHECK-NEXT: x = {{.*}}back(_t1); +// CHECK-NEXT: {{.*}}push_back_pullback(&{{.*}}back(_t2), {{.*}}back(_t1), &_d_v, &*_d_x); +// CHECK-NEXT: {{.*}}pop(_t1); +// CHECK-NEXT: {{.*}}pop(_t2); +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } + +// CHECK: void fn20_grad(double x, double y, double *_d_x, double *_d_y) { +// CHECK-NEXT: {{.*}}vector _d_v({}); +// CHECK-NEXT: {{.*}}vector v; +// CHECK-NEXT: {{.*}}vector _t0 = v; +// CHECK-NEXT: v.reserve(10); +// CHECK-NEXT: {{.*}}vector _t2 = v; +// CHECK-NEXT: double _t1 = v.capacity(); +// CHECK-NEXT: double _d_res = 0.; +// CHECK-NEXT: double res = x * _t1; +// CHECK-NEXT: double _t3 = x; +// CHECK-NEXT: {{.*}}vector _t4 = v; +// CHECK-NEXT: {{.*}}push_back_reverse_forw(&v, x, &_d_v, *_d_x); +// CHECK-NEXT: {{.*}}vector _t5 = v; +// CHECK-NEXT: v.shrink_to_fit(); +// CHECK-NEXT: double _t6 = res; +// CHECK-NEXT: {{.*}}vector _t8 = v; +// CHECK-NEXT: double _t7 = v.capacity(); +// CHECK-NEXT: {{.*}}vector _t10 = v; +// CHECK-NEXT: double _t9 = v.size(); +// CHECK-NEXT: res += y * _t7 + x * _t9; +// CHECK-NEXT: _d_res += 1; +// CHECK-NEXT: { +// CHECK-NEXT: res = _t6; +// CHECK-NEXT: double _r_d0 = _d_res; +// CHECK-NEXT: *_d_y += _r_d0 * _t7; +// CHECK-NEXT: {{.*}}capacity_pullback(&_t8, y * _r_d0, &_d_v); +// CHECK-NEXT: *_d_x += _r_d0 * _t9; +// CHECK-NEXT: {{.*}}size_pullback(&_t10, x * _r_d0, &_d_v); +// CHECK-NEXT: } +// CHECK-NEXT: {{.*}}shrink_to_fit_pullback(&_t5, &_d_v); +// CHECK-NEXT: { +// CHECK-NEXT: x = _t3; +// CHECK-NEXT: {{.*}}push_back_pullback(&_t4, _t3, &_d_v, &*_d_x); +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: *_d_x += _d_res * _t1; +// CHECK-NEXT: {{.*}}capacity_pullback(&_t2, x * _d_res, &_d_v); +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: {{.*}}size_type _r0 = {{0U|0UL|0}}; +// CHECK-NEXT: {{.*}}reserve_pullback(&_t0, 10, &_d_v, &_r0); +// CHECK-NEXT: } +// CHECK-NEXT: } + +// CHECK: void fn21_grad(double x, double y, double *_d_x, double *_d_y) { +// CHECK-NEXT: std::vector _d_a({}); +// CHECK-NEXT: std::vector a; +// CHECK-NEXT: std::vector _t0 = a; +// CHECK-NEXT: {{.*}}push_back_reverse_forw(&a, 0{{.*}}, &_d_a, _r0); +// CHECK-NEXT: std::vector _t1 = a; +// CHECK-NEXT: {{.*}}ValueAndAdjoint _t2 = {{.*}}operator_subscript_reverse_forw(&a, 0, &_d_a, _r1); +// CHECK-NEXT: double _t3 = _t2.value; +// CHECK-NEXT: _t2.value = x * x; +// CHECK-NEXT: std::vector _t4 = a; +// CHECK-NEXT: {{.*}}ValueAndAdjoint _t5 = {{.*}}operator_subscript_reverse_forw(&a, 0, &_d_a, _r2); +// CHECK-NEXT: { +// CHECK-NEXT: {{.*}}size_type _r2 = 0{{.*}}; +// CHECK-NEXT: {{.*}}operator_subscript_pullback(&_t4, 0, 1, &_d_a, &_r2); +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: _t2.value = _t3; +// CHECK-NEXT: double _r_d0 = _t2.adjoint; +// CHECK-NEXT: _t2.adjoint = 0{{.*}}; +// CHECK-NEXT: *_d_x += _r_d0 * x; +// CHECK-NEXT: *_d_x += x * _r_d0; +// CHECK-NEXT: {{.*}}size_type _r1 = 0{{.*}}; +// CHECK-NEXT: {{.*}}operator_subscript_pullback(&_t1, 0, 0{{.*}}, &_d_a, &_r1); +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK-NEXT: {{.*}}value_type _r0 = 0.; +// CHECK-NEXT: {{.*}}push_back_pullback(&_t0, 0{{.*}}, &_d_a, &_r0); +// CHECK-NEXT: } +// CHECK-NEXT: } diff --git a/test/lit.cfg b/test/lit.cfg index 484a84e48..afe312970 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -263,7 +263,7 @@ flags = ' -Xclang -add-plugin -Xclang clad -Xclang \ config.substitutions.append( ('%cladclang_cuda', config.clang + ' -std=c++17' + flags) ) -config.substitutions.append( ('%cladclang', config.clang + '++ -DCLAD_NO_NUM_DIFF ' + ' -std=c++11' + flags) ) +config.substitutions.append( ('%cladclang', config.clang + '++ -DCLAD_NO_NUM_DIFF ' + ' -std=c++14' + flags) ) config.substitutions.append( ('%cladlib', config.cladlib) ) diff --git a/tools/ClangPlugin.cpp b/tools/ClangPlugin.cpp index 19a5e7d1b..6a3ce81c0 100644 --- a/tools/ClangPlugin.cpp +++ b/tools/ClangPlugin.cpp @@ -90,6 +90,12 @@ namespace clad { CGOpts.PassPlugins.push_back(CladSoPath.str()); } #endif // CLANG_VERSION_MAJOR > 8 + + // Add define for __CLAD_SO_LOADED, so that CladFunction::CladFunction() + // doesn't throw an error. + auto predefines = m_CI.getPreprocessor().getPredefines(); + predefines.append("#define __CLAD_SO_LOADED 1\n"); + m_CI.getPreprocessor().setPredefines(predefines); } CladPlugin::~CladPlugin() {}