From 331f6ab8f6388d2c9f421a78d699d424e171802e Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Mon, 18 Feb 2019 08:23:57 +0100 Subject: [PATCH 01/28] ROCM for FFT calculations --- src/Band/diag_pseudo_potential.hpp | 2 +- src/Beta_projectors/beta_projectors_base.hpp | 6 +- src/Kernels/add_pw_ekin.cu | 1 + src/Kernels/create_beta_gk.cu | 2 + src/Kernels/cuda_uspp_kernels.cu | 1 + src/Kernels/density_rg.cu | 1 + src/Kernels/generate_dm_pw.cu | 1 + src/Kernels/generate_phase_factors.cu | 1 + src/Kernels/mul_by_veff.cu | 1 + src/Kernels/mul_veff_with_phase_factors.cu | 1 + src/Kernels/residuals_aux.cu | 1 + src/Kernels/spline.cu | 1 + src/Kernels/sum_q_pw_dm_pw.cu | 2 + src/SDDK/GPU/acc.hpp | 16 ++ src/SDDK/GPU/checksum.cu | 1 + src/SDDK/GPU/cuda_common.hpp | 99 ---------- src/SDDK/GPU/cuda_timer.hpp | 127 +++++++++++++ src/SDDK/GPU/fft_kernels.cu | 176 +++++++++--------- src/SDDK/GPU/fft_kernels.hip.cpp | 2 + src/SDDK/GPU/rocfft_interface.cpp | 185 +++++++++++++++++++ src/SDDK/GPU/rocfft_interface.hpp | 60 ++++++ src/SDDK/GPU/scale_matrix.cu | 1 + src/SDDK/fft3d.hpp | 154 +++++++-------- src/SDDK/linalg.hpp | 26 ++- 24 files changed, 582 insertions(+), 286 deletions(-) create mode 100644 src/SDDK/GPU/cuda_timer.hpp create mode 100644 src/SDDK/GPU/fft_kernels.hip.cpp create mode 100644 src/SDDK/GPU/rocfft_interface.cpp create mode 100644 src/SDDK/GPU/rocfft_interface.hpp diff --git a/src/Band/diag_pseudo_potential.hpp b/src/Band/diag_pseudo_potential.hpp index f22128d9d..f9bd43dcb 100644 --- a/src/Band/diag_pseudo_potential.hpp +++ b/src/Band/diag_pseudo_potential.hpp @@ -22,7 +22,7 @@ * \brief Diagonalization of pseudopotential Hamiltonian. */ -#ifdef __GPU +#if defined(__GPU) && defined(__CUDA) extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, int n, double c, diff --git a/src/Beta_projectors/beta_projectors_base.hpp b/src/Beta_projectors/beta_projectors_base.hpp index 9cb58f116..0d0ecf895 100644 --- a/src/Beta_projectors/beta_projectors_base.hpp +++ b/src/Beta_projectors/beta_projectors_base.hpp @@ -28,7 +28,7 @@ namespace sirius { -#ifdef __GPU +#if defined(__GPU) && defined(__CUDA) extern "C" void create_beta_gk_gpu(int num_atoms, int num_gkvec, int const* beta_desc, @@ -322,7 +322,7 @@ class Beta_projectors_base break; } case device_t::GPU: { -#ifdef __GPU +#if defined(__GPU) && defined(__CUDA) auto& desc = chunk(ichunk__).desc_; create_beta_gk_gpu(chunk(ichunk__).num_atoms_, num_gkvec_loc(), @@ -331,6 +331,8 @@ class Beta_projectors_base gkvec_coord_.at(memory_t::device), chunk(ichunk__).atom_pos_.at(memory_t::device), pw_coeffs_a().at(memory_t::device)); +#else + throw std::runtime_error("create_beta_gk_gpu() not implemented for non-CUDA devices!"); #endif /* wave-functions are on CPU but the beta-projectors are on GPU */ if (gkvec_.comm().rank() == 0 && is_host_memory(ctx_.preferred_memory_t())) { diff --git a/src/Kernels/add_pw_ekin.cu b/src/Kernels/add_pw_ekin.cu index 2a48c3b76..76a3b75d9 100644 --- a/src/Kernels/add_pw_ekin.cu +++ b/src/Kernels/add_pw_ekin.cu @@ -23,6 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" +#include __global__ void add_pw_ekin_gpu_kernel(int num_gvec__, double alpha__, diff --git a/src/Kernels/create_beta_gk.cu b/src/Kernels/create_beta_gk.cu index bebd7e509..f26c1f2c5 100644 --- a/src/Kernels/create_beta_gk.cu +++ b/src/Kernels/create_beta_gk.cu @@ -23,6 +23,8 @@ */ #include "../SDDK/GPU/cuda_common.hpp" +#include "../SDDK/GPU/cuda_timer.hpp" +#include __global__ void create_beta_gk_gpu_kernel ( diff --git a/src/Kernels/cuda_uspp_kernels.cu b/src/Kernels/cuda_uspp_kernels.cu index 0ecc20719..bcd24a9ae 100644 --- a/src/Kernels/cuda_uspp_kernels.cu +++ b/src/Kernels/cuda_uspp_kernels.cu @@ -23,6 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" +#include extern cudaStream_t* streams; extern "C" void* cuda_malloc(size_t size); diff --git a/src/Kernels/density_rg.cu b/src/Kernels/density_rg.cu index 632d10a91..e23158f3c 100644 --- a/src/Kernels/density_rg.cu +++ b/src/Kernels/density_rg.cu @@ -23,6 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" +#include __global__ void update_density_rg_1_gpu_kernel(int size__, cuDoubleComplex const* psi_rg__, diff --git a/src/Kernels/generate_dm_pw.cu b/src/Kernels/generate_dm_pw.cu index 71ff0bcf5..ff006ca9b 100644 --- a/src/Kernels/generate_dm_pw.cu +++ b/src/Kernels/generate_dm_pw.cu @@ -25,6 +25,7 @@ #include "../SDDK/GPU/cuda_common.hpp" #include "../SDDK/GPU/acc.hpp" #include "../SDDK/GPU/cublas.hpp" +#include __global__ void generate_phase_factors_conj_gpu_kernel ( diff --git a/src/Kernels/generate_phase_factors.cu b/src/Kernels/generate_phase_factors.cu index f7821f798..2c5caa696 100644 --- a/src/Kernels/generate_phase_factors.cu +++ b/src/Kernels/generate_phase_factors.cu @@ -23,6 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" +#include __global__ void generate_phase_factors_gpu_kernel ( diff --git a/src/Kernels/mul_by_veff.cu b/src/Kernels/mul_by_veff.cu index d2999173f..8d59fe60d 100644 --- a/src/Kernels/mul_by_veff.cu +++ b/src/Kernels/mul_by_veff.cu @@ -24,6 +24,7 @@ #include "../SDDK/GPU/cuda_common.hpp" #include "../SDDK/GPU/acc.hpp" +#include __global__ void mul_by_veff0_gpu_kernel(int size__, double* const* veff__, diff --git a/src/Kernels/mul_veff_with_phase_factors.cu b/src/Kernels/mul_veff_with_phase_factors.cu index afca42512..e6d62fcb4 100644 --- a/src/Kernels/mul_veff_with_phase_factors.cu +++ b/src/Kernels/mul_veff_with_phase_factors.cu @@ -24,6 +24,7 @@ #include "../SDDK/GPU/cuda_common.hpp" #include "../SDDK/GPU/acc.hpp" +#include __global__ void mul_veff_with_phase_factors_gpu_kernel(int num_gvec_loc__, cuDoubleComplex const* veff__, diff --git a/src/Kernels/residuals_aux.cu b/src/Kernels/residuals_aux.cu index 98b6cff17..0e6358499 100644 --- a/src/Kernels/residuals_aux.cu +++ b/src/Kernels/residuals_aux.cu @@ -23,6 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" +#include __global__ void compute_residuals_gpu_kernel ( diff --git a/src/Kernels/spline.cu b/src/Kernels/spline.cu index 6b57facac..cb9d5eec0 100644 --- a/src/Kernels/spline.cu +++ b/src/Kernels/spline.cu @@ -23,6 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" +#include __global__ void spline_inner_product_gpu_kernel_v3(int num_points__, int const* idx_ri__, diff --git a/src/Kernels/sum_q_pw_dm_pw.cu b/src/Kernels/sum_q_pw_dm_pw.cu index 077332731..3fc8ffce4 100644 --- a/src/Kernels/sum_q_pw_dm_pw.cu +++ b/src/Kernels/sum_q_pw_dm_pw.cu @@ -24,6 +24,8 @@ #include "../SDDK/GPU/cuda_common.hpp" #include "../SDDK/GPU/acc.hpp" +#include "../SDDK/GPU/cuda_timer.hpp" +#include __global__ void sum_q_pw_dm_pw_gpu_kernel ( diff --git a/src/SDDK/GPU/acc.hpp b/src/SDDK/GPU/acc.hpp index 2937525cf..028ea2559 100644 --- a/src/SDDK/GPU/acc.hpp +++ b/src/SDDK/GPU/acc.hpp @@ -481,6 +481,22 @@ extern "C" void scale_matrix_elements_gpu(std::complex* ptr__, int nrow__, int ncol__, double beta__); +#elif defined(__ROCM) +inline void scale_matrix_columns_gpu(int nrow, int ncol, void* mtrx, double* a) { + throw std::runtime_error("Not implemented for ROCM!"); +} + +inline void scale_matrix_rows_gpu(int nrow, int ncol, void* mtrx, double const* v) { + throw std::runtime_error("Not implemented for ROCM!"); +} + +inline void scale_matrix_elements_gpu(std::complex* ptr__, + int ld__, + int nrow__, + int ncol__, + double beta__) { + throw std::runtime_error("Not implemented for ROCM!"); +} #endif diff --git a/src/SDDK/GPU/checksum.cu b/src/SDDK/GPU/checksum.cu index ce16538a0..2b65d8ddf 100644 --- a/src/SDDK/GPU/checksum.cu +++ b/src/SDDK/GPU/checksum.cu @@ -23,6 +23,7 @@ */ #include "cuda_common.hpp" +#include __global__ void double_complex_checksum_gpu_kernel ( diff --git a/src/SDDK/GPU/cuda_common.hpp b/src/SDDK/GPU/cuda_common.hpp index e527d44e7..c3c461a42 100644 --- a/src/SDDK/GPU/cuda_common.hpp +++ b/src/SDDK/GPU/cuda_common.hpp @@ -25,15 +25,6 @@ #ifndef __CUDA_COMMON_HPP__ #define __CUDA_COMMON_HPP__ -#include -#include -#include -#include -#include -#include -#include -#include - const double twopi = 6.2831853071795864769; inline __device__ size_t array2D_offset(int i0, int i1, int ld0) @@ -56,95 +47,5 @@ inline __host__ __device__ int num_blocks(int length, int block_size) return (length / block_size) + ((length % block_size) ? 1 : 0); } -class CUDA_timers_wrapper -{ - private: - - std::map > cuda_timers_; - - public: - - void add_measurment(std::string const& label, float value) - { - cuda_timers_[label].push_back(value / 1000); - } - - void print() - { - printf("\n"); - printf("CUDA timers \n"); - for (int i = 0; i < 115; i++) printf("-"); - printf("\n"); - printf("name count total min max average\n"); - for (int i = 0; i < 115; i++) printf("-"); - printf("\n"); - - std::map >::iterator it; - for (it = cuda_timers_.begin(); it != cuda_timers_.end(); it++) { - int count = (int)it->second.size(); - double total = 0.0; - float minval = 1e10; - float maxval = 0.0; - for (int i = 0; i < count; i++) { - total += it->second[i]; - minval = std::min(minval, it->second[i]); - maxval = std::max(maxval, it->second[i]); - } - double average = (count == 0) ? 0.0 : total / count; - if (count == 0) { - minval = 0.0; - } - - printf("%-60s : %5i %10.4f %10.4f %10.4f %10.4f\n", it->first.c_str(), count, total, minval, maxval, average); - } - } -}; - -class CUDA_timer -{ - private: - - cudaEvent_t e_start_; - cudaEvent_t e_stop_; - bool active_; - std::string label_; - - void start() - { - cudaEventCreate(&e_start_); - cudaEventCreate(&e_stop_); - cudaEventRecord(e_start_, 0); - } - - void stop() - { - float time; - cudaEventRecord(e_stop_, 0); - cudaEventSynchronize(e_stop_); - cudaEventElapsedTime(&time, e_start_, e_stop_); - cudaEventDestroy(e_start_); - cudaEventDestroy(e_stop_); - cuda_timers_wrapper().add_measurment(label_, time); - active_ = false; - } - - public: - - CUDA_timer(std::string const& label__) : label_(label__), active_(false) - { - start(); - } - - ~CUDA_timer() - { - stop(); - } - - static CUDA_timers_wrapper& cuda_timers_wrapper() - { - static CUDA_timers_wrapper cuda_timers_wrapper_; - return cuda_timers_wrapper_; - } -}; #endif diff --git a/src/SDDK/GPU/cuda_timer.hpp b/src/SDDK/GPU/cuda_timer.hpp new file mode 100644 index 000000000..90876b4a3 --- /dev/null +++ b/src/SDDK/GPU/cuda_timer.hpp @@ -0,0 +1,127 @@ + +// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are permitted provided that +// the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the +// following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +// and the following disclaimer in the documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** \file cuda_timer.hpp + * + * \brief Timer for CUDA kernels. + */ + +#ifndef __CUDA_TIMER_HPP__ +#define __CUDA_TIMER_HPP__ +#include +#include +#include +#include +#include +#include +#include + +class CUDA_timers_wrapper +{ + private: + + std::map > cuda_timers_; + + public: + + void add_measurment(std::string const& label, float value) + { + cuda_timers_[label].push_back(value / 1000); + } + + void print() + { + printf("\n"); + printf("CUDA timers \n"); + for (int i = 0; i < 115; i++) printf("-"); + printf("\n"); + printf("name count total min max average\n"); + for (int i = 0; i < 115; i++) printf("-"); + printf("\n"); + + std::map >::iterator it; + for (it = cuda_timers_.begin(); it != cuda_timers_.end(); it++) { + int count = (int)it->second.size(); + double total = 0.0; + float minval = 1e10; + float maxval = 0.0; + for (int i = 0; i < count; i++) { + total += it->second[i]; + minval = std::min(minval, it->second[i]); + maxval = std::max(maxval, it->second[i]); + } + double average = (count == 0) ? 0.0 : total / count; + if (count == 0) { + minval = 0.0; + } + + printf("%-60s : %5i %10.4f %10.4f %10.4f %10.4f\n", it->first.c_str(), count, total, minval, maxval, average); + } + } +}; + +class CUDA_timer +{ + private: + + cudaEvent_t e_start_; + cudaEvent_t e_stop_; + bool active_; + std::string label_; + + void start() + { + cudaEventCreate(&e_start_); + cudaEventCreate(&e_stop_); + cudaEventRecord(e_start_, 0); + } + + void stop() + { + float time; + cudaEventRecord(e_stop_, 0); + cudaEventSynchronize(e_stop_); + cudaEventElapsedTime(&time, e_start_, e_stop_); + cudaEventDestroy(e_start_); + cudaEventDestroy(e_stop_); + cuda_timers_wrapper().add_measurment(label_, time); + active_ = false; + } + + public: + + CUDA_timer(std::string const& label__) : label_(label__), active_(false) + { + start(); + } + + ~CUDA_timer() + { + stop(); + } + + static CUDA_timers_wrapper& cuda_timers_wrapper() + { + static CUDA_timers_wrapper cuda_timers_wrapper_; + return cuda_timers_wrapper_; + } +}; + +#endif diff --git a/src/SDDK/GPU/fft_kernels.cu b/src/SDDK/GPU/fft_kernels.cu index 5ac1a74bc..7717912b2 100644 --- a/src/SDDK/GPU/fft_kernels.cu +++ b/src/SDDK/GPU/fft_kernels.cu @@ -19,13 +19,15 @@ /** \file fft_kernels.cu * - * \brief Contains implementaiton of CUDA kernels necessary for a FFT driver. + * \brief Contains implementaiton of CUDA and ROCM kernels necessary for a FFT driver. */ #include "acc.hpp" +#include +#include "hip/hip_runtime.h" #include "cuda_common.hpp" -#include +//NOTE: HIP will call the corresponding CUDA function if compiled with CUDA support template @@ -33,17 +35,17 @@ __global__ void repack_z_buffer_gpu_kernel(int size_z, int num_zcol_loc, int const* local_z_offsets, int const* local_z_sizes, - cuDoubleComplex* z_sticks_local, - cuDoubleComplex* a2a_buffer) + double2* z_sticks_local, + double2* a2a_buffer) { - int iz = blockDim.x * blockIdx.x + threadIdx.x; - int izcol = blockIdx.y; - int rank = blockIdx.z; + int iz = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int izcol = hipBlockIdx_y; + int rank = hipBlockIdx_z; int local_zsize = local_z_sizes[rank]; if (iz < local_zsize) { int offs = local_z_offsets[rank]; - if (direction == -11) { + if (direction == -1) { z_sticks_local[offs + iz + izcol * size_z] = a2a_buffer[offs * num_zcol_loc + izcol * local_zsize + iz]; } if (direction == 1) { @@ -59,15 +61,14 @@ extern "C" void repack_z_buffer_gpu(int direction, int zcol_max_size, int const* local_z_offsets, int const* local_z_sizes, - cuDoubleComplex* z_sticks_local, - cuDoubleComplex* a2a_buffer) + double2* z_sticks_local, + double2* a2a_buffer) { dim3 grid_t(64); dim3 grid_b(num_blocks(zcol_max_size, grid_t.x), num_zcol_loc, num_ranks); if (direction == 1) { - repack_z_buffer_gpu_kernel<1> <<>> - ( + hipLaunchKernelGGL((repack_z_buffer_gpu_kernel<1>), dim3(grid_b), dim3(grid_t), 0, 0, size_z, num_zcol_loc, local_z_offsets, @@ -76,8 +77,7 @@ extern "C" void repack_z_buffer_gpu(int direction, a2a_buffer ); } else { - repack_z_buffer_gpu_kernel<-1> <<>> - ( + hipLaunchKernelGGL((repack_z_buffer_gpu_kernel<-1>), dim3(grid_b), dim3(grid_t), 0, 0, size_z, num_zcol_loc, local_z_offsets, @@ -93,11 +93,11 @@ extern "C" void repack_z_buffer_gpu(int direction, __global__ void batch_load_gpu_kernel(int fft_size, int num_pw_components, int const* map, - cuDoubleComplex const* data, - cuDoubleComplex* fft_buffer) + double2 const* data, + double2* fft_buffer) { - int i = blockIdx.y; - int idx = blockDim.x * blockIdx.x + threadIdx.x; + int i = hipBlockIdx_y; + int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; if (idx < num_pw_components) { fft_buffer[array2D_offset(map[idx], i, fft_size)] = data[array2D_offset(idx, i, num_pw_components)]; @@ -108,19 +108,18 @@ extern "C" void batch_load_gpu(int fft_size, int num_pw_components, int num_fft, int const* map, - cuDoubleComplex const* data, - cuDoubleComplex* fft_buffer, + double2 const* data, + double2* fft_buffer, int stream_id__) { dim3 grid_t(64); dim3 grid_b(num_blocks(num_pw_components, grid_t.x), num_fft); - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); - acc::zero(fft_buffer, fft_size * num_fft); + hipMemsetAsync(fft_buffer, 0, fft_size * num_fft * sizeof(double2), stream); - batch_load_gpu_kernel <<>> - ( + hipLaunchKernelGGL((batch_load_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, fft_size, num_pw_components, map, @@ -132,18 +131,18 @@ extern "C" void batch_load_gpu(int fft_size, __global__ void batch_unload_gpu_kernel(int fft_size, int num_pw_components, int const* map, - cuDoubleComplex const* fft_buffer, - cuDoubleComplex* data, + double2 const* fft_buffer, + double2* data, double alpha, double beta) { - int i = blockIdx.y; - int idx = blockDim.x * blockIdx.x + threadIdx.x; + int i = hipBlockIdx_y; + int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; if (idx < num_pw_components) { - cuDoubleComplex z1 = data[array2D_offset(idx, i, num_pw_components)]; - cuDoubleComplex z2 = fft_buffer[array2D_offset(map[idx], i, fft_size)]; - data[array2D_offset(idx, i, num_pw_components)] = make_cuDoubleComplex(alpha * z1.x + beta * z2.x, alpha * z1.y + beta * z2.y); + double2 z1 = data[array2D_offset(idx, i, num_pw_components)]; + double2 z2 = fft_buffer[array2D_offset(map[idx], i, fft_size)]; + data[array2D_offset(idx, i, num_pw_components)] = double2{alpha * z1.x + beta * z2.x, alpha * z1.y + beta * z2.y}; //data[array2D_offset(idx, i, num_pw_components)] = cuCadd( // cuCmul(make_cuDoubleComplex(alpha, 0), data[array2D_offset(idx, i, num_pw_components)]), @@ -158,8 +157,8 @@ extern "C" void batch_unload_gpu(int fft_size, int num_pw_components, int num_fft, int const* map, - cuDoubleComplex const* fft_buffer, - cuDoubleComplex* data, + double2 const* fft_buffer, + double2* data, double alpha, double beta, int stream_id__) @@ -167,14 +166,13 @@ extern "C" void batch_unload_gpu(int fft_size, dim3 grid_t(64); dim3 grid_b(num_blocks(num_pw_components, grid_t.x), num_fft); - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); if (alpha == 0) { - acc::zero(data, num_pw_components); + hipMemsetAsync(data, 0, num_pw_components * sizeof(double2), stream); } - batch_unload_gpu_kernel <<>> - ( + hipLaunchKernelGGL((batch_unload_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, fft_size, num_pw_components, map, @@ -187,30 +185,29 @@ extern "C" void batch_unload_gpu(int fft_size, __global__ void load_x0y0_col_gpu_kernel(int z_col_size, int const* map, - cuDoubleComplex const* data, - cuDoubleComplex* fft_buffer) + double2 const* data, + double2* fft_buffer) { - int idx = blockDim.x * blockIdx.x + threadIdx.x; + int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; if (idx < z_col_size) { - fft_buffer[map[idx]] = make_cuDoubleComplex(data[idx].x, -data[idx].y); + fft_buffer[map[idx]] = double2{data[idx].x, -data[idx].y}; } } extern "C" void load_x0y0_col_gpu(int z_col_size, int const* map, - cuDoubleComplex const* data, - cuDoubleComplex* fft_buffer, + double2 const* data, + double2* fft_buffer, int stream_id__) { dim3 grid_t(64); dim3 grid_b(num_blocks(z_col_size, grid_t.x)); - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); - load_x0y0_col_gpu_kernel <<>> - ( + hipLaunchKernelGGL((load_x0y0_col_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, z_col_size, map, data, @@ -219,23 +216,24 @@ extern "C" void load_x0y0_col_gpu(int z_col_size, } template -__global__ void pack_unpack_z_cols_gpu_kernel(cuDoubleComplex* z_cols_packed__, - cuDoubleComplex* fft_buf__, +__global__ void pack_unpack_z_cols_gpu_kernel(double2* z_cols_packed__, + double2* fft_buf__, int size_x__, int size_y__, int size_z__, int num_z_cols__, int const* z_col_pos__) { - int icol = blockIdx.x * blockDim.x + threadIdx.x; - int iz = blockIdx.y; + int icol = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + int iz = hipBlockIdx_y; int size_xy = size_x__ * size_y__; if (icol < num_z_cols__) { int ipos = z_col_pos__[icol]; /* load into buffer */ if (direction == 1) { if (conjugate) { - fft_buf__[array2D_offset(ipos, iz, size_xy)] = cuConj(z_cols_packed__[array2D_offset(iz, icol, size_z__)]); + fft_buf__[array2D_offset(ipos, iz, size_xy)].x = z_cols_packed__[array2D_offset(iz, icol, size_z__)].x; + fft_buf__[array2D_offset(ipos, iz, size_xy)].y = -z_cols_packed__[array2D_offset(iz, icol, size_z__)].y; } else { fft_buf__[array2D_offset(ipos, iz, size_xy)] = z_cols_packed__[array2D_offset(iz, icol, size_z__)]; @@ -247,8 +245,8 @@ __global__ void pack_unpack_z_cols_gpu_kernel(cuDoubleComplex* z_cols_packed__, } } -extern "C" void unpack_z_cols_gpu(cuDoubleComplex* z_cols_packed__, - cuDoubleComplex* fft_buf__, +extern "C" void unpack_z_cols_gpu(double2* z_cols_packed__, + double2* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -257,15 +255,14 @@ extern "C" void unpack_z_cols_gpu(cuDoubleComplex* z_cols_packed__, bool use_reduction__, int stream_id__) { - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - cudaMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(cuDoubleComplex), stream); + hipMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(double2), stream); - pack_unpack_z_cols_gpu_kernel<1, false> <<>> - ( + hipLaunchKernelGGL((pack_unpack_z_cols_gpu_kernel<1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed__, fft_buf__, size_x__, @@ -275,8 +272,7 @@ extern "C" void unpack_z_cols_gpu(cuDoubleComplex* z_cols_packed__, z_col_pos__ ); if (use_reduction__) { - pack_unpack_z_cols_gpu_kernel<1, true> <<>> - ( + hipLaunchKernelGGL((pack_unpack_z_cols_gpu_kernel<1, true>), dim3(grid_b), dim3(grid_t), 0, stream, &z_cols_packed__[size_z__], // skip first column for {-x, -y} coordinates fft_buf__, size_x__, @@ -288,8 +284,8 @@ extern "C" void unpack_z_cols_gpu(cuDoubleComplex* z_cols_packed__, } } -extern "C" void pack_z_cols_gpu(cuDoubleComplex* z_cols_packed__, - cuDoubleComplex* fft_buf__, +extern "C" void pack_z_cols_gpu(double2* z_cols_packed__, + double2* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -297,13 +293,12 @@ extern "C" void pack_z_cols_gpu(cuDoubleComplex* z_cols_packed__, int const* z_col_pos__, int stream_id__) { - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - pack_unpack_z_cols_gpu_kernel<-1, false> <<>> - ( + hipLaunchKernelGGL((pack_unpack_z_cols_gpu_kernel<-1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed__, fft_buf__, size_x__, @@ -315,48 +310,48 @@ extern "C" void pack_z_cols_gpu(cuDoubleComplex* z_cols_packed__, } template -__global__ void pack_unpack_two_z_cols_gpu_kernel(cuDoubleComplex* z_cols_packed1__, - cuDoubleComplex* z_cols_packed2__, - cuDoubleComplex* fft_buf__, +__global__ void pack_unpack_two_z_cols_gpu_kernel(double2* z_cols_packed1__, + double2* z_cols_packed2__, + double2* fft_buf__, int size_x__, int size_y__, int size_z__, int num_z_cols__, int const* z_col_pos__) { - int icol = blockIdx.x * blockDim.x + threadIdx.x; - int iz = blockIdx.y; + int icol = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + int iz = hipBlockIdx_y; int size_xy = size_x__ * size_y__; if (icol < num_z_cols__) { /* load into buffer */ if (direction == 1) { int ipos = z_col_pos__[icol]; - cuDoubleComplex z1 = z_cols_packed1__[array2D_offset(iz, icol, size_z__)]; - cuDoubleComplex z2 = z_cols_packed2__[array2D_offset(iz, icol, size_z__)]; + double2 z1 = z_cols_packed1__[array2D_offset(iz, icol, size_z__)]; + double2 z2 = z_cols_packed2__[array2D_offset(iz, icol, size_z__)]; if (conjugate) { /* conj(z1) + I * conj(z2) */ - fft_buf__[array2D_offset(ipos, iz, size_xy)] = make_cuDoubleComplex(z1.x + z2.y, z2.x - z1.y); + fft_buf__[array2D_offset(ipos, iz, size_xy)] = double2{z1.x + z2.y, z2.x - z1.y}; } else { /* z1 + I * z2 */ - fft_buf__[array2D_offset(ipos, iz, size_xy)] = make_cuDoubleComplex(z1.x - z2.y, z1.y + z2.x); + fft_buf__[array2D_offset(ipos, iz, size_xy)] = double2{z1.x - z2.y, z1.y + z2.x}; } } if (direction == -1) { int ipos1 = z_col_pos__[icol]; int ipos2 = z_col_pos__[num_z_cols__ + icol]; - cuDoubleComplex z1 = fft_buf__[array2D_offset(ipos1, iz, size_xy)]; - cuDoubleComplex z2 = fft_buf__[array2D_offset(ipos2, iz, size_xy)]; + double2 z1 = fft_buf__[array2D_offset(ipos1, iz, size_xy)]; + double2 z2 = fft_buf__[array2D_offset(ipos2, iz, size_xy)]; - z_cols_packed1__[array2D_offset(iz, icol, size_z__)] = make_cuDoubleComplex(0.5 * (z1.x + z2.x), 0.5 * (z1.y - z2.y)); - z_cols_packed2__[array2D_offset(iz, icol, size_z__)] = make_cuDoubleComplex(0.5 * (z1.y + z2.y), 0.5 * (z2.x - z1.x)); + z_cols_packed1__[array2D_offset(iz, icol, size_z__)] = double2{0.5 * (z1.x + z2.x), 0.5 * (z1.y - z2.y)}; + z_cols_packed2__[array2D_offset(iz, icol, size_z__)] = double2{0.5 * (z1.y + z2.y), 0.5 * (z2.x - z1.x)}; } } } -extern "C" void unpack_z_cols_2_gpu(cuDoubleComplex* z_cols_packed1__, - cuDoubleComplex* z_cols_packed2__, - cuDoubleComplex* fft_buf__, +extern "C" void unpack_z_cols_2_gpu(double2* z_cols_packed1__, + double2* z_cols_packed2__, + double2* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -364,15 +359,14 @@ extern "C" void unpack_z_cols_2_gpu(cuDoubleComplex* z_cols_packed1__, int const* z_col_pos__, int stream_id__) { - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - cudaMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(cuDoubleComplex), stream); + hipMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(double2), stream); - pack_unpack_two_z_cols_gpu_kernel<1, false> <<>> - ( + hipLaunchKernelGGL((pack_unpack_two_z_cols_gpu_kernel<1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed1__, z_cols_packed2__, fft_buf__, @@ -382,8 +376,7 @@ extern "C" void unpack_z_cols_2_gpu(cuDoubleComplex* z_cols_packed1__, num_z_cols__, z_col_pos__ ); - pack_unpack_two_z_cols_gpu_kernel<1, true> <<>> - ( + hipLaunchKernelGGL((pack_unpack_two_z_cols_gpu_kernel<1, true>), dim3(grid_b), dim3(grid_t), 0, stream, &z_cols_packed1__[size_z__], // skip first column for {-x, -y} coordinates &z_cols_packed2__[size_z__], // skip first column for {-x, -y} coordinates fft_buf__, @@ -395,9 +388,9 @@ extern "C" void unpack_z_cols_2_gpu(cuDoubleComplex* z_cols_packed1__, ); } -extern "C" void pack_z_cols_2_gpu(cuDoubleComplex* z_cols_packed1__, - cuDoubleComplex* z_cols_packed2__, - cuDoubleComplex* fft_buf__, +extern "C" void pack_z_cols_2_gpu(double2* z_cols_packed1__, + double2* z_cols_packed2__, + double2* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -405,13 +398,12 @@ extern "C" void pack_z_cols_2_gpu(cuDoubleComplex* z_cols_packed1__, int const* z_col_pos__, int stream_id__) { - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - pack_unpack_two_z_cols_gpu_kernel<-1, false> <<>> - ( + hipLaunchKernelGGL((pack_unpack_two_z_cols_gpu_kernel<-1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed1__, z_cols_packed2__, fft_buf__, diff --git a/src/SDDK/GPU/fft_kernels.hip.cpp b/src/SDDK/GPU/fft_kernels.hip.cpp new file mode 100644 index 000000000..adb876051 --- /dev/null +++ b/src/SDDK/GPU/fft_kernels.hip.cpp @@ -0,0 +1,2 @@ +// dummy file for compilation with hipcc, because compiling .cu files for amd gpus does not work +#include "fft_kernels.cu" diff --git a/src/SDDK/GPU/rocfft_interface.cpp b/src/SDDK/GPU/rocfft_interface.cpp new file mode 100644 index 000000000..2599223a8 --- /dev/null +++ b/src/SDDK/GPU/rocfft_interface.cpp @@ -0,0 +1,185 @@ +// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are permitted provided that +// the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the +// following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +// and the following disclaimer in the documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** \file rocfft_interface.cpp + * + * \brief Implementation of interface to cuFFT related functions. + */ + +#include +#include +#include +#include "rocfft_interface.hpp" +#include "acc.hpp" + +namespace rocfft +{ +#define CALL_ROCFFT(func__, args__) \ + { \ + if ((func__ args__) != rocfft_status_success) { \ + printf("Error in %s at line %i of file %s: ", #func__, __LINE__, __FILE__); \ + exit(-100); \ + } \ + } + +#define CALL_HIP(cmd) \ + { \ + hipError_t error = cmd; \ + if (error != hipSuccess) { \ + fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error, \ + __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } + +struct rocfft_handler { + rocfft_plan plan_forward = nullptr; + rocfft_plan plan_backward = nullptr; + rocfft_execution_info info = nullptr; + void* work_buffer = nullptr; + size_t work_size = 0; + hipStream_t stream = 0; +}; + +void initialize() { CALL_ROCFFT(rocfft_setup, ()); } + +void finalize() { CALL_ROCFFT(rocfft_cleanup, ()); } + +void destroy_plan_handle(void* plan) +{ + rocfft_handler* handler = static_cast(plan); + + // free all device memory + if (handler->plan_forward != nullptr) CALL_ROCFFT(rocfft_plan_destroy, (handler->plan_forward)); + if (handler->plan_backward != nullptr) + CALL_ROCFFT(rocfft_plan_destroy, (handler->plan_backward)); + if (handler->info != nullptr) CALL_ROCFFT(rocfft_execution_info_destroy, (handler->info)); + + // free handler itself + delete handler; +} + +void* create_batch_plan(int rank, int* dims, int* embed, int stride, int dist, int nfft, + bool auto_alloc) +{ + // TODO: check how allocation could be implemented + if (auto_alloc) throw std::runtime_error("Auto allocation for rocfft not implemented!"); + + // check input + for (size_t i = 0; i < rank; i++) { + if (dims[i] > embed[i]) + throw std::runtime_error("Illegal dims or embed parameters for ROCFFT plan creation!"); + } + + rocfft_plan_description desc = nullptr; + + // ROCFFT appears to expect dimension to be ordered in reverse (see hipFFT implementation) + size_t lengths[3] = {1, 1, 1}; + for (size_t i = 0; i < rank; i++) lengths[i] = dims[rank - 1 - i]; + + if (embed != nullptr) { + rocfft_plan_description_create(&desc); + + size_t strides[3] = {(size_t)stride, 1, 1}; + + size_t nembed_lengths[3] = {1, 1, 1}; + for (size_t i = 0; i < rank; i++) nembed_lengths[i] = embed[rank - 1 - i]; + + for (size_t i = 1; i < rank; i++) strides[i] = nembed_lengths[i - 1] * strides[i - 1]; + + CALL_ROCFFT( + rocfft_plan_description_set_data_layout, + (desc, rocfft_array_type_complex_interleaved, rocfft_array_type_complex_interleaved, 0, + 0, rank, strides, dist, rank, strides, dist)); + } + + rocfft_handler* handler = new rocfft_handler(); + + // create plans + CALL_ROCFFT(rocfft_execution_info_create, (&handler->info)); + + CALL_ROCFFT(rocfft_plan_create, (&handler->plan_forward, rocfft_placement_inplace, + rocfft_transform_type_complex_forward, rocfft_precision_double, + rank, lengths, nfft, desc)); + CALL_ROCFFT(rocfft_plan_create, (&handler->plan_backward, rocfft_placement_inplace, + rocfft_transform_type_complex_inverse, rocfft_precision_double, + rank, lengths, nfft, desc)); + + // description no longer needed + CALL_ROCFFT(rocfft_plan_description_destroy, (desc)); + + // calculate workbuffer size + size_t work_size_forward, work_size_backward; + CALL_ROCFFT(rocfft_plan_get_work_buffer_size, (handler->plan_forward, &work_size_forward)); + CALL_ROCFFT(rocfft_plan_get_work_buffer_size, (handler->plan_backward, &work_size_backward)); + handler->work_size = std::max(work_size_forward, work_size_backward); + + return static_cast(handler); +} + +size_t get_work_size(int ndim, int* dims, int nfft) +{ + rocfft_handler* handler = static_cast( + create_batch_plan(ndim, dims, nullptr, 1, dims[0], nfft, false)); + const size_t work_size = handler->work_size; + destroy_plan_handle(handler); + return work_size; +} + +size_t get_work_size(void* plan) +{ + rocfft_handler* handler = static_cast(plan); + return handler->work_size; +} + +void set_work_area(void* plan, void* work_area) +{ + rocfft_handler* handler = static_cast(plan); + handler->work_buffer = work_area; + CALL_ROCFFT(rocfft_execution_info_set_work_buffer, + (handler->info, work_area, handler->work_size)); +} + +void set_stream(void* plan__, stream_id sid__) +{ + CALL_ROCFFT(rocfft_execution_info_set_stream, + (static_cast(plan__)->info, acc::stream(sid__))); +} + +void forward_transform(void* plan, std::complex* fft_buffer) +{ + rocfft_handler* handler = static_cast(plan); + + void* buffer_array[1]; + buffer_array[0] = (void*)fft_buffer; + + CALL_ROCFFT(rocfft_execute, (handler->plan_forward, buffer_array, buffer_array, handler->info)); +} + +void backward_transform(void* plan, std::complex* fft_buffer) +{ + rocfft_handler* handler = static_cast(plan); + + void* buffer_array[1]; + buffer_array[0] = (void*)fft_buffer; + + CALL_ROCFFT(rocfft_execute, + (handler->plan_backward, buffer_array, buffer_array, handler->info)); +} +} // namespace rocfft diff --git a/src/SDDK/GPU/rocfft_interface.hpp b/src/SDDK/GPU/rocfft_interface.hpp new file mode 100644 index 000000000..5fe0edc28 --- /dev/null +++ b/src/SDDK/GPU/rocfft_interface.hpp @@ -0,0 +1,60 @@ +// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are permitted provided that +// the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the +// following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +// and the following disclaimer in the documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** \file rocfft_interace.hpp + * + * \brief Interface to rocFFT related functions. + */ + +#ifndef __ROCFFT_INTERFACE_HPP__ +#define __ROCFFT_INTERFACE_HPP__ +#include +#include "acc.hpp" + +namespace rocfft +{ +void destroy_plan_handle(void* plan); + +// NOTE: creates a new plan for work size calculation; if a plan is available call directly with +// pointer to it for better performance +size_t get_work_size(int ndim, int* dims, int nfft); + +size_t get_work_size(void* plan); + +// embed can be nullptr (stride and dist are then ignored) +void* create_batch_plan(int rank, int* dims, int* embed, int stride, int dist, int nfft, + bool auto_alloc); + +void set_work_area(void* plan, void* work_area); + +void set_stream(void* plan__, stream_id sid__); + +void forward_transform(void* plan, std::complex* fft_buffer); + +void backward_transform(void* plan, std::complex* fft_buffer); + + +// function for rocfft library initializeation +// NOTE: Source code in ROCM suggests nothing is actually done (empty functions) +void initialize(); + +void finalize(); + +} // namespace rocfft +#endif diff --git a/src/SDDK/GPU/scale_matrix.cu b/src/SDDK/GPU/scale_matrix.cu index 174dfcf64..31ac26e7f 100644 --- a/src/SDDK/GPU/scale_matrix.cu +++ b/src/SDDK/GPU/scale_matrix.cu @@ -22,6 +22,7 @@ * \brief Contains implementaiton of CUDA kernels to scale matrix elements (rows or columns). */ #include "cuda_common.hpp" +#include __global__ void scale_matrix_columns_gpu_kernel ( diff --git a/src/SDDK/fft3d.hpp b/src/SDDK/fft3d.hpp index d193cf0d8..ca8f99945 100644 --- a/src/SDDK/fft3d.hpp +++ b/src/SDDK/fft3d.hpp @@ -29,12 +29,15 @@ #include "geometry3d.hpp" #include "fft3d_grid.hpp" #include "gvec.hpp" + #if defined(__GPU) && defined(__CUDA) #include "GPU/cufft.hpp" #include "GPU/fft_kernels.hpp" -#endif -#if defined(__GPU) && defined(__ROCM) +#define GPUFFT cufft +#elif defined(__GPU) && defined(__ROCM) #include "GPU/rocfft_interface.hpp" +#include "GPU/fft_kernels.hpp" +#define GPUFFT rocfft #endif namespace sddk { @@ -133,23 +136,14 @@ class FFT3D : public FFT3D_grid memory_t a2a_mem_type{memory_t::host}; - /// Handler for the forward accelerator FFT plan for the z-transformation of G-vectors. - void* acc_fft_plan_z_forward_gvec_{nullptr}; - - /// Handler for the forward accelerator FFT plan for the z-transformation of G+k-vectors. - void* acc_fft_plan_z_forward_gkvec_{nullptr}; - - /// Handler for the backward accelerator FFT plan for the z-transformation of G-vectors. - void* acc_fft_plan_z_backward_gvec_{nullptr}; + /// Handler for the accelerator FFT plan for the z-transformation of G-vectors. + void* acc_fft_plan_z_gvec_{nullptr}; - /// Handler for the backward accelerator FFT plan for the z-transformation of G+k-vectors. - void* acc_fft_plan_z_backward_gkvec_{nullptr}; + /// Handler for the accelerator FFT plan for the z-transformation of G+k-vectors. + void* acc_fft_plan_z_gkvec_{nullptr}; - /// Handler for forward accelerator FFT plan for the xy-transformation. - void* acc_fft_plan_xy_forward_{nullptr}; - - /// Handler for backward accelerator FFT plan for the xy-transformation. - void* acc_fft_plan_xy_backward_{nullptr}; + /// Handler for accelerator FFT plan for the xy-transformation. + void* acc_fft_plan_xy_{nullptr}; /// Offsets for z-buffer. mdarray z_offsets_; @@ -193,7 +187,7 @@ class FFT3D : public FFT3D_grid /// Initialize z-transformation and get the maximum number of z-columns. inline int init_plan_z(Gvec_partition const& gvp__, int zcol_count_max__, - void** acc_fft_plan_forward__, void** acc_fft_plan_backward__) + void** acc_fft_plan__) { /* check if we need to create a batch cuFFT plan for larger number of z-columns */ if (gvp__.zcol_count_fft() > zcol_count_max__) { @@ -201,27 +195,13 @@ class FFT3D : public FFT3D_grid zcol_count_max__ = gvp__.zcol_count_fft(); switch (pu_) { case device_t::GPU: { - if (*acc_fft_plan_forward__) { -#if defined(__CUDA) - cufft::destroy_plan_handle(*acc_fft_plan_forward__); -#elif defined(__ROCM) - rocfft::destroy_plan_handle(*acc_fft_plan_forward__); - rocfft::destroy_plan_handle(*acc_fft_plan_backward__); -#endif +#if defined(__GPU) + if (*acc_fft_plan__) { + GPUFFT::destroy_plan_handle(*acc_fft_plan__); } -#if defined(__CUDA) || defined(__ROCM) int dim_z[] = {size(2)}; -#endif -#if defined(__CUDA) - *acc_fft_plan_forward__ = cufft::create_batch_plan(1, dim_z, dim_z, 1, size(2), zcol_count_max__, false); - cufft::set_stream(*acc_fft_plan_forward__, stream_id(acc_fft_stream_id_)); - /* in case of CUDA this is an alias */ - *acc_fft_plan_backward__ = *acc_fft_plan_forward__; -#elif defined(__ROCM) - *acc_fft_plan_forward__ = rocfft::create_batch_plan(1, dim_z, dim_z, 1, size(2), zcol_count_max__, false); - *acc_fft_plan_backward__ = rocfft::create_batch_plan(1, dim_z, dim_z, 1, size(2), zcol_count_max__, false); - rocfft::set_stream(*acc_fft_plan_forward__, stream_id(acc_fft_stream_id_)); - rocfft::set_stream(*acc_fft_plan_backward__, stream_id(acc_fft_stream_id_)); + *acc_fft_plan__ = GPUFFT::create_batch_plan(1, dim_z, dim_z, 1, size(2), zcol_count_max__, false); + GPUFFT::set_stream(*acc_fft_plan__, stream_id(acc_fft_stream_id_)); #endif break; } @@ -279,7 +259,6 @@ class FFT3D : public FFT3D_grid if (is_device_memory(mem__)) { utils::timer t("sddk::FFT3D::transform_z_serial|gpu"); #if defined(__GPU) -#if defined(__CUDA) switch (direction) { case 1: { /* load all columns into FFT buffer */ @@ -292,7 +271,7 @@ class FFT3D : public FFT3D_grid fft_buffer_aux__.at(memory_t::device), acc_fft_stream_id_); } /* transform all columns */ - cufft::backward_transform(acc_fft_plan_z__, fft_buffer_aux__.at(memory_t::device)); + GPUFFT::backward_transform(acc_fft_plan_z__, fft_buffer_aux__.at(memory_t::device)); /* repack from fft_buffer_aux to fft_buffer */ repack_z_buffer_gpu(direction, comm_.size(), size(2), num_zcol_local, max_zloc_size_, @@ -318,7 +297,7 @@ class FFT3D : public FFT3D_grid fft_buffer_.at(memory_t::device)); /* transform all columns */ - cufft::forward_transform(acc_fft_plan_z__, fft_buffer_aux__.at(memory_t::device)); + GPUFFT::forward_transform(acc_fft_plan_z__, fft_buffer_aux__.at(memory_t::device)); /* get all columns from FFT buffer */ batch_unload_gpu(gvec_partition_->zcol_count_fft() * size(2), gvec_partition_->gvec_count_fft(), 1, map_gvec_to_fft_buffer_.at(memory_t::device), @@ -331,7 +310,6 @@ class FFT3D : public FFT3D_grid } } acc::sync_stream(stream_id(acc_fft_stream_id_)); -#endif #endif } @@ -512,7 +490,6 @@ class FFT3D : public FFT3D_grid switch (pu_) { case device_t::GPU: { #if defined(__GPU) -#if defined(__CUDA) /* stream #0 will be doing cuFFT */ switch (direction) { case 1: { @@ -522,12 +499,12 @@ class FFT3D : public FFT3D_grid gvec_partition_->gvec().num_zcol(), z_col_pos_.at(memory_t::device), is_reduced, acc_fft_stream_id_); /* stream #0 executes FFT */ - cufft::backward_transform(acc_fft_plan_xy_backward_, fft_buffer_.at(memory_t::device)); + GPUFFT::backward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); break; } case -1: { /* stream #0 executes FFT */ - cufft::forward_transform(acc_fft_plan_xy_forward_, fft_buffer_.at(memory_t::device)); + GPUFFT::forward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); /* stream #0 packs z-columns */ pack_z_cols_gpu(fft_buffer_aux__.at(memory_t::device), fft_buffer_.at(memory_t::device), size(0), size(1), local_size_z(), @@ -536,7 +513,6 @@ class FFT3D : public FFT3D_grid } } acc::sync_stream(stream_id(acc_fft_stream_id_)); -#endif #endif break; } @@ -605,7 +581,6 @@ class FFT3D : public FFT3D_grid int size_xy = size(0) * size(1); #if defined(__GPU) -#if defined(__CUDA) if (pu_ == device_t::GPU) { /* stream #0 will be doing cuFFT */ switch (direction) { @@ -616,12 +591,12 @@ class FFT3D : public FFT3D_grid fft_buffer_.at(memory_t::device), size(0), size(1), local_size_z(), gvec_partition_->gvec().num_zcol(), z_col_pos_.at(memory_t::device), acc_fft_stream_id_); /* stream #0 executes FFT */ - cufft::backward_transform(acc_fft_plan_xy_backward_, fft_buffer_.at(memory_t::device)); + GPUFFT::backward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); break; } case -1: { /* stream #0 executes FFT */ - cufft::forward_transform(acc_fft_plan_xy_forward_, fft_buffer_.at(memory_t::device)); + GPUFFT::forward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); /* stream #0 packs z-columns */ pack_z_cols_2_gpu(fft_buffer_aux1__.at(memory_t::device), fft_buffer_aux2__.at(memory_t::device), @@ -632,7 +607,6 @@ class FFT3D : public FFT3D_grid } acc::sync_stream(stream_id(acc_fft_stream_id_)); } -#endif #endif if (pu_ == device_t::CPU) { @@ -752,6 +726,10 @@ class FFT3D : public FFT3D_grid #if defined(__GPU) if (pu_ == device_t::GPU) { +#if defined(__ROCM) + rocfft::initialize(); +#endif + #if defined(__GPU_DIRECT) #pragma message "=========== GPU direct is enabled ==============" is_gpu_direct_ = true; @@ -760,19 +738,13 @@ class FFT3D : public FFT3D_grid bool auto_alloc{false}; int dim_xy[] = {size(1), size(0)}; -#if defined(__CUDA) /* create plan for xy transform */ - acc_fft_plan_xy_forward_ = cufft::create_batch_plan(2, dim_xy, dim_xy, 1, size(0) * size(1), local_size_z(), + acc_fft_plan_xy_ = GPUFFT::create_batch_plan(2, dim_xy, dim_xy, 1, size(0) * size(1), local_size_z(), auto_alloc); /* in CUDA case this is an alias */ - acc_fft_plan_xy_backward_ = acc_fft_plan_xy_forward_; + acc_fft_plan_xy_ = acc_fft_plan_xy_; /* stream #0 will execute FFTs */ - cufft::set_stream(acc_fft_plan_xy_forward_, stream_id(acc_fft_stream_id_)); -#endif -#if defined(__ROCM) - acc_fft_plan_xy_forward_ = rocfft::create_batch_plan(-1, 2, dim_xy, size(0) * size(1), local_size_z(), - auto_alloc); -#endif + GPUFFT::set_stream(acc_fft_plan_xy_, stream_id(acc_fft_stream_id_)); /* allocate arrays with z- offsets and sizes on the host and device*/ z_offsets_ = mdarray(comm_.size()); z_sizes_ = mdarray(comm_.size()); @@ -811,18 +783,15 @@ class FFT3D : public FFT3D_grid } #if defined(__GPU) if (pu_ == device_t::GPU) { -#if defined(__CUDA) - cufft::destroy_plan_handle(acc_fft_plan_xy_forward_); - if (acc_fft_plan_z_forward_gvec_) { - cufft::destroy_plan_handle(acc_fft_plan_z_forward_gvec_); + GPUFFT::destroy_plan_handle(acc_fft_plan_xy_); + if (acc_fft_plan_z_gvec_) { + GPUFFT::destroy_plan_handle(acc_fft_plan_z_gvec_); } - if (acc_fft_plan_z_forward_gkvec_) { - cufft::destroy_plan_handle(acc_fft_plan_z_forward_gkvec_); + if (acc_fft_plan_z_gkvec_) { + GPUFFT::destroy_plan_handle(acc_fft_plan_z_gkvec_); } -#endif #if defined(__ROCM) - rocfft::destroy_plan(acc_fft_plan_xy_forward_); - rocfft::destroy_plan(acc_fft_plan_xy_backward_); + rocfft::finalize(); #endif } #endif @@ -982,11 +951,9 @@ class FFT3D : public FFT3D_grid /* init z-plan for G-vector transformation */ if (gvp__.gvec().bare()) { - zcol_gvec_count_max_ = init_plan_z(gvp__, zcol_gvec_count_max_, &acc_fft_plan_z_forward_gvec_, - &acc_fft_plan_z_backward_gvec_); + zcol_gvec_count_max_ = init_plan_z(gvp__, zcol_gvec_count_max_, &acc_fft_plan_z_gvec_); } else { /* init z-plan for G+k vector transformation */ - zcol_gkvec_count_max_ = init_plan_z(gvp__, zcol_gkvec_count_max_, &acc_fft_plan_z_forward_gkvec_, - &acc_fft_plan_z_backward_gkvec_); + zcol_gkvec_count_max_ = init_plan_z(gvp__, zcol_gkvec_count_max_, &acc_fft_plan_z_gkvec_); } reallocate_fft_buffer_aux(fft_buffer_aux1_); reallocate_fft_buffer_aux(fft_buffer_aux2_); @@ -1025,7 +992,7 @@ class FFT3D : public FFT3D_grid } map_gvec_to_fft_buffer_x0y0_.allocate(memory_t::device).copy_to(memory_t::device); } -#if defined(__CUDA) || defined(__ROCM) +#if defined(__GPU) int zcol_count_max{0}; if (gvp__.gvec().bare()) { zcol_count_max = zcol_gvec_count_max_; @@ -1037,20 +1004,25 @@ class FFT3D : public FFT3D_grid int dims_xy[] = {size(1), size(0)}; #endif -#if defined(__CUDA) +#if defined(__GPU) /* maximum worksize of z and xy transforms */ - work_size = std::max(cufft::get_work_size(2, dims_xy, local_size_z()), - cufft::get_work_size(1, dim_z, zcol_count_max)); +#if defined(__CUDA) + work_size = std::max(GPUFFT::get_work_size(2, dims_xy, local_size_z()), + GPUFFT::get_work_size(1, dim_z, zcol_count_max)); +#elif defined(__ROCM) + work_size = std::max(GPUFFT::get_work_size(acc_fft_plan_xy_), + GPUFFT::get_work_size(acc_fft_plan_z_gvec_)); +#endif /* allocate accelerator fft work buffer */ acc_fft_work_buf_ = mdarray(work_size, memory_t::device, "FFT3D.acc_fft_work_buf_"); - /* set work area for cufft */ - cufft::set_work_area(acc_fft_plan_xy_forward_, acc_fft_work_buf_.at(memory_t::device)); + /* set work area for GPUFFT */ + GPUFFT::set_work_area(acc_fft_plan_xy_, acc_fft_work_buf_.at(memory_t::device)); if (gvp__.gvec().bare()) { - cufft::set_work_area(acc_fft_plan_z_forward_gvec_, acc_fft_work_buf_.at(memory_t::device)); + GPUFFT::set_work_area(acc_fft_plan_z_gvec_, acc_fft_work_buf_.at(memory_t::device)); } else { - cufft::set_work_area(acc_fft_plan_z_forward_gkvec_, acc_fft_work_buf_.at(memory_t::device)); + GPUFFT::set_work_area(acc_fft_plan_z_gkvec_, acc_fft_work_buf_.at(memory_t::device)); } #endif fft_buffer_aux1_.allocate(memory_t::device); @@ -1100,9 +1072,9 @@ class FFT3D : public FFT3D_grid switch (direction) { case 1: { if (gvec_partition_->gvec().bare()) { - transform_z(data__, fft_buffer_aux1_, acc_fft_plan_z_backward_gvec_, mem); + transform_z(data__, fft_buffer_aux1_, acc_fft_plan_z_gvec_, mem); } else { - transform_z(data__, fft_buffer_aux1_, acc_fft_plan_z_backward_gkvec_, mem); + transform_z(data__, fft_buffer_aux1_, acc_fft_plan_z_gkvec_, mem); } transform_xy(fft_buffer_aux1_); break; @@ -1110,9 +1082,9 @@ class FFT3D : public FFT3D_grid case -1: { transform_xy(fft_buffer_aux1_); if (gvec_partition_->gvec().bare()) { - transform_z(data__, fft_buffer_aux1_, acc_fft_plan_z_forward_gvec_, mem); + transform_z(data__, fft_buffer_aux1_, acc_fft_plan_z_gvec_, mem); } else { - transform_z(data__, fft_buffer_aux1_, acc_fft_plan_z_forward_gkvec_, mem); + transform_z(data__, fft_buffer_aux1_, acc_fft_plan_z_gkvec_, mem); } break; } @@ -1139,11 +1111,11 @@ class FFT3D : public FFT3D_grid switch (direction) { case 1: { if (gvec_partition_->gvec().bare()) { - transform_z(data1__, fft_buffer_aux1_, acc_fft_plan_z_backward_gvec_, mem); - transform_z(data2__, fft_buffer_aux2_, acc_fft_plan_z_backward_gvec_, mem); + transform_z(data1__, fft_buffer_aux1_, acc_fft_plan_z_gvec_, mem); + transform_z(data2__, fft_buffer_aux2_, acc_fft_plan_z_gvec_, mem); } else { - transform_z(data1__, fft_buffer_aux1_, acc_fft_plan_z_backward_gkvec_, mem); - transform_z(data2__, fft_buffer_aux2_, acc_fft_plan_z_backward_gkvec_, mem); + transform_z(data1__, fft_buffer_aux1_, acc_fft_plan_z_gkvec_, mem); + transform_z(data2__, fft_buffer_aux2_, acc_fft_plan_z_gkvec_, mem); } transform_xy(fft_buffer_aux1_, fft_buffer_aux2_); break; @@ -1151,11 +1123,11 @@ class FFT3D : public FFT3D_grid case -1: { transform_xy(fft_buffer_aux1_, fft_buffer_aux2_); if (gvec_partition_->gvec().bare()) { - transform_z(data1__, fft_buffer_aux1_, acc_fft_plan_z_forward_gvec_, mem); - transform_z(data2__, fft_buffer_aux2_, acc_fft_plan_z_forward_gvec_, mem); + transform_z(data1__, fft_buffer_aux1_, acc_fft_plan_z_gvec_, mem); + transform_z(data2__, fft_buffer_aux2_, acc_fft_plan_z_gvec_, mem); } else { - transform_z(data1__, fft_buffer_aux1_, acc_fft_plan_z_forward_gkvec_, mem); - transform_z(data2__, fft_buffer_aux2_, acc_fft_plan_z_forward_gkvec_, mem); + transform_z(data1__, fft_buffer_aux1_, acc_fft_plan_z_gkvec_, mem); + transform_z(data2__, fft_buffer_aux2_, acc_fft_plan_z_gkvec_, mem); } break; } diff --git a/src/SDDK/linalg.hpp b/src/SDDK/linalg.hpp index dac97909d..8ca052ba2 100644 --- a/src/SDDK/linalg.hpp +++ b/src/SDDK/linalg.hpp @@ -26,7 +26,7 @@ #define __LINALG_HPP__ #include -#ifdef __GPU +#ifdef __CUDA #include "GPU/cublas.hpp" #endif #ifdef __MAGMA @@ -1184,7 +1184,11 @@ inline void linalg::gemv(int trans__, ftn_int m, ftn_in int stream_id) { const char trans[] = {'N', 'T', 'C'}; +#if defined(__GPU) && defined(__CUDA) cublas::zgemv(trans[trans__], m, n, (cuDoubleComplex*)alpha, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)x, incx, (cuDoubleComplex*)beta, (cuDoubleComplex*)y, incy, stream_id); +#else + throw std::runtime_error("not compiled with cublas"); +#endif } // Generic interface to zgemm @@ -1201,7 +1205,11 @@ inline void linalg::gemm(int transa__, int transb__, ft assert(n > 0); assert(k > 0); const char trans[] = {'N', 'T', 'C'}; +#if defined(__GPU) && defined(__CUDA) cublas::zgemm(trans[transa__], trans[transb__], m, n, k, (cuDoubleComplex*)alpha, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)B, ldb, (cuDoubleComplex*)beta, (cuDoubleComplex*)C, ldc, stream_id); +#else + throw std::runtime_error("not compiled with cublas"); +#endif } // Generic interface to dgemm @@ -1218,7 +1226,11 @@ inline void linalg::gemm(int transa__, int transb__, ftn_int m, assert(n > 0); assert(k > 0); const char trans[] = {'N', 'T', 'C'}; +#if defined(__GPU) && defined(__CUDA) cublas::dgemm(trans[transa__], trans[transb__], m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, stream_id); +#else + throw std::runtime_error("not compiled with cublas"); +#endif } template <> @@ -1261,7 +1273,11 @@ inline void linalg::trmm(char side, ftn_double* B, ftn_int ldb) { +#if defined(__GPU) && defined(__CUDA) cublas::dtrmm(side, uplo, transa, 'N', m, n, alpha, A, lda, B, ldb); +#else + throw std::runtime_error("not compiled with cublas"); +#endif } template <> @@ -1276,7 +1292,11 @@ inline void linalg::trmm(char side, ftn_double_complex* B, ftn_int ldb) { +#if defined(__GPU) && defined(__CUDA) cublas::ztrmm(side, uplo, transa, 'N', m, n, (cuDoubleComplex*)alpha, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)B, ldb); +#else + throw std::runtime_error("not compiled with cublas"); +#endif } template <> @@ -1287,7 +1307,11 @@ inline void linalg::axpy(ftn_int n__, ftn_double_complex* y__, ftn_int incy__) { +#if defined(__GPU) && defined(__CUDA) cublas::zaxpy(n__, (cuDoubleComplex const*)alpha__, (cuDoubleComplex*)x__, incx__, (cuDoubleComplex*)y__, incy__); +#else + throw std::runtime_error("not compiled with cublas"); +#endif } #endif // __GPU From d9217b7b01fb61744bfd95b6b2fb279f7539eff3 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Mon, 18 Feb 2019 16:53:46 +0100 Subject: [PATCH 02/28] ported cuda kernels to AMD HIP --- src/Kernels/_update_apw_coeffs.cu_ | 17 +-- src/Kernels/add_pw_ekin.cu | 22 ++-- src/Kernels/add_pw_ekin.hip.cpp | 1 + src/Kernels/create_beta_gk.cu | 23 ++-- src/Kernels/create_beta_gk.hip.cpp | 1 + src/Kernels/cuda_uspp_kernels.cu | 115 +++++++++--------- src/Kernels/cuda_uspp_kernels.hip.cpp | 1 + src/Kernels/density_rg.cu | 25 ++-- src/Kernels/density_rg.hip.cpp | 1 + src/Kernels/generate_dm_pw.cu | 21 ++-- src/Kernels/generate_dm_pw.hip.cpp | 1 + src/Kernels/generate_phase_factors.cu | 12 +- src/Kernels/generate_phase_factors.hip.cpp | 1 + src/Kernels/mul_by_veff.cu | 41 ++++--- src/Kernels/mul_by_veff.hip.cpp | 1 + src/Kernels/mul_veff_with_phase_factors.cu | 20 +-- .../mul_veff_with_phase_factors.hip.cpp | 1 + src/Kernels/random.cu | 1 + src/Kernels/random.hip.cpp | 1 + src/Kernels/residuals_aux.cu | 57 +++++---- src/Kernels/residuals_aux.hip.cpp | 1 + src/Kernels/spline.cu | 8 +- src/Kernels/spline.hip.cpp | 1 + src/Kernels/sum_q_pw_dm_pw.cu | 21 ++-- src/Kernels/sum_q_pw_dm_pw.hip.cpp | 1 + src/SDDK/GPU/cuda_common.hpp | 4 + 26 files changed, 214 insertions(+), 185 deletions(-) create mode 100644 src/Kernels/add_pw_ekin.hip.cpp create mode 100644 src/Kernels/create_beta_gk.hip.cpp create mode 100644 src/Kernels/cuda_uspp_kernels.hip.cpp create mode 100644 src/Kernels/density_rg.hip.cpp create mode 100644 src/Kernels/generate_dm_pw.hip.cpp create mode 100644 src/Kernels/generate_phase_factors.hip.cpp create mode 100644 src/Kernels/mul_by_veff.hip.cpp create mode 100644 src/Kernels/mul_veff_with_phase_factors.hip.cpp create mode 100644 src/Kernels/random.hip.cpp create mode 100644 src/Kernels/residuals_aux.hip.cpp create mode 100644 src/Kernels/spline.hip.cpp create mode 100644 src/Kernels/sum_q_pw_dm_pw.hip.cpp diff --git a/src/Kernels/_update_apw_coeffs.cu_ b/src/Kernels/_update_apw_coeffs.cu_ index 99872b9cd..ab9fcd73e 100644 --- a/src/Kernels/_update_apw_coeffs.cu_ +++ b/src/Kernels/_update_apw_coeffs.cu_ @@ -1,9 +1,11 @@ +#include "hip/hip_runtime.h" #include "kernels_common.hpp" +#include "hip/hip_complex.h" -__global__ void update_apw_coeffs_gpu_kernel(cuDoubleComplex* apw_coeffs__, +__global__ void update_apw_coeffs_gpu_kernel(hipDoubleComplex* apw_coeffs__, int ld__, - cuDoubleComplex* v__, - cuDoubleComplex* alm__, + hipDoubleComplex* v__, + hipDoubleComplex* alm__, int nrow__) { int icol = blockIdx.y; @@ -14,18 +16,17 @@ __global__ void update_apw_coeffs_gpu_kernel(cuDoubleComplex* apw_coeffs__, } -extern "C" void update_apw_coeffs_gpu(cuDoubleComplex* apw_coeffs__, +extern "C" void update_apw_coeffs_gpu(hipDoubleComplex* apw_coeffs__, int ld__, - cuDoubleComplex* v__, - cuDoubleComplex* alm__, + hipDoubleComplex* v__, + hipDoubleComplex* alm__, int nrow__, int ncol__) { dim3 grid_t(64); dim3 grid_b(num_blocks(nrow__, grid_t.x), ncol__); - update_apw_coeffs_gpu_kernel <<>> - ( + hipLaunchKernelGGL((update_apw_coeffs_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, ); diff --git a/src/Kernels/add_pw_ekin.cu b/src/Kernels/add_pw_ekin.cu index 76a3b75d9..1860f8da7 100644 --- a/src/Kernels/add_pw_ekin.cu +++ b/src/Kernels/add_pw_ekin.cu @@ -23,20 +23,21 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" __global__ void add_pw_ekin_gpu_kernel(int num_gvec__, double alpha__, double const* pw_ekin__, - cuDoubleComplex const* phi__, - cuDoubleComplex const* vphi__, - cuDoubleComplex* hphi__) + hipDoubleComplex const* phi__, + hipDoubleComplex const* vphi__, + hipDoubleComplex* hphi__) { int ig = blockIdx.x * blockDim.x + threadIdx.x; if (ig < num_gvec__) { - cuDoubleComplex z1 = cuCadd(vphi__[ig], make_cuDoubleComplex(alpha__ * pw_ekin__[ig] * phi__[ig].x, + hipDoubleComplex z1 = hipCadd(vphi__[ig], make_hipDoubleComplex(alpha__ * pw_ekin__[ig] * phi__[ig].x, alpha__ * pw_ekin__[ig] * phi__[ig].y)); - hphi__[ig] = cuCadd(hphi__[ig], z1); + hphi__[ig] = hipCadd(hphi__[ig], z1); } } @@ -47,15 +48,14 @@ __global__ void add_pw_ekin_gpu_kernel(int num_gvec__, extern "C" void add_pw_ekin_gpu(int num_gvec__, double alpha__, double const* pw_ekin__, - cuDoubleComplex const* phi__, - cuDoubleComplex const* vphi__, - cuDoubleComplex* hphi__) + hipDoubleComplex const* phi__, + hipDoubleComplex const* vphi__, + hipDoubleComplex* hphi__) { dim3 grid_t(64); dim3 grid_b(num_blocks(num_gvec__, grid_t.x)); - add_pw_ekin_gpu_kernel <<>> - ( + hipLaunchKernelGGL((add_pw_ekin_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gvec__, alpha__, pw_ekin__, diff --git a/src/Kernels/add_pw_ekin.hip.cpp b/src/Kernels/add_pw_ekin.hip.cpp new file mode 100644 index 000000000..a30c1a193 --- /dev/null +++ b/src/Kernels/add_pw_ekin.hip.cpp @@ -0,0 +1 @@ +#include "add_pw_ekin.cu" diff --git a/src/Kernels/create_beta_gk.cu b/src/Kernels/create_beta_gk.cu index f26c1f2c5..ed01407d5 100644 --- a/src/Kernels/create_beta_gk.cu +++ b/src/Kernels/create_beta_gk.cu @@ -23,17 +23,21 @@ */ #include "../SDDK/GPU/cuda_common.hpp" +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" + +#ifdef __CUDA #include "../SDDK/GPU/cuda_timer.hpp" -#include +#endif __global__ void create_beta_gk_gpu_kernel ( int num_gkvec__, int const* beta_desc__, - cuDoubleComplex const* beta_gk_t, + hipDoubleComplex const* beta_gk_t, double const* gkvec, double const* atom_pos, - cuDoubleComplex* beta_gk + hipDoubleComplex* beta_gk ) { int ia = blockIdx.y; @@ -55,8 +59,8 @@ __global__ void create_beta_gk_gpu_kernel for (int xi = 0; xi < nbf; xi++) { beta_gk[array2D_offset(igk, offset_beta_gk + xi, num_gkvec__)] = - cuCmul(beta_gk_t[array2D_offset(igk, offset_beta_gk_t + xi, num_gkvec__)], - make_cuDoubleComplex(cosp, -sinp)); + hipCmul(beta_gk_t[array2D_offset(igk, offset_beta_gk_t + xi, num_gkvec__)], + make_hipDoubleComplex(cosp, -sinp)); } } } @@ -64,18 +68,19 @@ __global__ void create_beta_gk_gpu_kernel extern "C" void create_beta_gk_gpu(int num_atoms, int num_gkvec, int const* beta_desc, - cuDoubleComplex const* beta_gk_t, + hipDoubleComplex const* beta_gk_t, double const* gkvec, double const* atom_pos, - cuDoubleComplex* beta_gk) + hipDoubleComplex* beta_gk) { +#ifdef __CUDA CUDA_timer t("create_beta_gk_gpu"); +#endif dim3 grid_t(64); dim3 grid_b(num_blocks(num_gkvec, grid_t.x), num_atoms); - create_beta_gk_gpu_kernel <<>> - ( + hipLaunchKernelGGL((create_beta_gk_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gkvec, beta_desc, beta_gk_t, diff --git a/src/Kernels/create_beta_gk.hip.cpp b/src/Kernels/create_beta_gk.hip.cpp new file mode 100644 index 000000000..cb49ea3a5 --- /dev/null +++ b/src/Kernels/create_beta_gk.hip.cpp @@ -0,0 +1 @@ +#include "create_beta_gk.cu" diff --git a/src/Kernels/cuda_uspp_kernels.cu b/src/Kernels/cuda_uspp_kernels.cu index bcd24a9ae..3e38d2758 100644 --- a/src/Kernels/cuda_uspp_kernels.cu +++ b/src/Kernels/cuda_uspp_kernels.cu @@ -23,22 +23,23 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" -extern cudaStream_t* streams; +extern hipStream_t* streams; extern "C" void* cuda_malloc(size_t size); extern "C" void cuda_free(void* ptr); extern "C" void cublas_zgemm(int transa, int transb, int32_t m, int32_t n, int32_t k, - cuDoubleComplex* alpha, cuDoubleComplex* a, int32_t lda, cuDoubleComplex* b, - int32_t ldb, cuDoubleComplex* beta, cuDoubleComplex* c, int32_t ldc, int stream_id); + hipDoubleComplex* alpha, hipDoubleComplex* a, int32_t lda, hipDoubleComplex* b, + int32_t ldb, hipDoubleComplex* beta, hipDoubleComplex* c, int32_t ldc, int stream_id); __global__ void compute_chebyshev_order1_gpu_kernel ( int num_gkvec__, double c__, double r__, - cuDoubleComplex* phi0__, - cuDoubleComplex* phi1__ + hipDoubleComplex* phi0__, + hipDoubleComplex* phi1__ ) { int igk = blockDim.x * blockIdx.x + threadIdx.x; @@ -48,11 +49,11 @@ __global__ void compute_chebyshev_order1_gpu_kernel { int i = array2D_offset(igk, j, num_gkvec__); // phi0 * c - cuDoubleComplex z1 = cuCmul(phi0__[i], make_cuDoubleComplex(c__, 0)); + hipDoubleComplex z1 = hipCmul(phi0__[i], make_hipDoubleComplex(c__, 0)); // phi1 - phi0 * c - cuDoubleComplex z2 = cuCsub(phi1__[i], z1); + hipDoubleComplex z2 = hipCsub(phi1__[i], z1); // (phi1 - phi0 * c) / r - phi1__[i] = cuCdiv(z2, make_cuDoubleComplex(r__, 0)); + phi1__[i] = hipCdiv(z2, make_hipDoubleComplex(r__, 0)); } } @@ -61,9 +62,9 @@ __global__ void compute_chebyshev_orderk_gpu_kernel int num_gkvec__, double c__, double r__, - cuDoubleComplex* phi0__, - cuDoubleComplex* phi1__, - cuDoubleComplex* phi2__ + hipDoubleComplex* phi0__, + hipDoubleComplex* phi1__, + hipDoubleComplex* phi2__ ) { int igk = blockDim.x * blockIdx.x + threadIdx.x; @@ -73,13 +74,13 @@ __global__ void compute_chebyshev_orderk_gpu_kernel { int i = array2D_offset(igk, j, num_gkvec__); // phi1 * c - cuDoubleComplex z1 = cuCmul(phi1__[i], make_cuDoubleComplex(c__, 0)); + hipDoubleComplex z1 = hipCmul(phi1__[i], make_hipDoubleComplex(c__, 0)); // phi2 - phi1 * c - cuDoubleComplex z2 = cuCsub(phi2__[i], z1); + hipDoubleComplex z2 = hipCsub(phi2__[i], z1); // (phi2 - phi1 * c) * 2 / r - cuDoubleComplex z3 = cuCmul(z2, make_cuDoubleComplex(2.0 / r__, 0)); + hipDoubleComplex z3 = hipCmul(z2, make_hipDoubleComplex(2.0 / r__, 0)); // (phi2 - phi1 * c) * 2 / r - phi0 - phi2__[i] = cuCsub(z3, phi0__[i]); + phi2__[i] = hipCsub(z3, phi0__[i]); } } @@ -87,17 +88,16 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, int n, double c, double r, - cuDoubleComplex* phi0, - cuDoubleComplex* phi1, - cuDoubleComplex* phi2) + hipDoubleComplex* phi0, + hipDoubleComplex* phi1, + hipDoubleComplex* phi2) { dim3 grid_t(64); dim3 grid_b(num_blocks(num_gkvec, grid_t.x), n); if (phi2 == NULL) { - compute_chebyshev_order1_gpu_kernel <<>> - ( + hipLaunchKernelGGL((compute_chebyshev_order1_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gkvec, c, r, @@ -107,8 +107,7 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, } else { - compute_chebyshev_orderk_gpu_kernel <<>> - ( + hipLaunchKernelGGL((compute_chebyshev_orderk_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gkvec, c, r, @@ -128,9 +127,9 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== int* beta_t_idx, //== double* atom_pos, //== double* gkvec, -//== cuDoubleComplex* beta_pw_type, -//== cuDoubleComplex* phi, -//== cuDoubleComplex* beta_phi) +//== hipDoubleComplex* beta_pw_type, +//== hipDoubleComplex* phi, +//== hipDoubleComplex* beta_phi) //== { //== int idx_beta = blockDim.x * blockIdx.x + threadIdx.x; //== int idx_phi = blockDim.y * blockIdx.y + threadIdx.y; @@ -148,12 +147,12 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== //== int N = num_blocks(num_gkvec, BLOCK_SIZE); //== -//== cuDoubleComplex val = make_cuDoubleComplex(0.0, 0.0); +//== hipDoubleComplex val = make_hipDoubleComplex(0.0, 0.0); //== //== for (int m = 0; m < N; m++) //== { -//== __shared__ cuDoubleComplex beta_pw_tile[BLOCK_SIZE][BLOCK_SIZE]; -//== __shared__ cuDoubleComplex phi_tile[BLOCK_SIZE][BLOCK_SIZE]; +//== __shared__ hipDoubleComplex beta_pw_tile[BLOCK_SIZE][BLOCK_SIZE]; +//== __shared__ hipDoubleComplex phi_tile[BLOCK_SIZE][BLOCK_SIZE]; //== //== int bs = (m + 1) * BLOCK_SIZE > num_gkvec ? num_gkvec - m * BLOCK_SIZE : BLOCK_SIZE; //== @@ -169,8 +168,8 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== double sinp = sin(p); //== double cosp = cos(p); //== -//== beta_pw_tile[threadIdx.x][threadIdx.y] = cuCmul(cuConj(beta_pw_type[array2D_offset(igk, offset_t, num_gkvec)]), -//== make_cuDoubleComplex(cosp, sinp)); +//== beta_pw_tile[threadIdx.x][threadIdx.y] = hipCmul(hipConj(beta_pw_type[array2D_offset(igk, offset_t, num_gkvec)]), +//== make_hipDoubleComplex(cosp, sinp)); //== //== } //== @@ -181,7 +180,7 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== //== __syncthreads(); //== -//== for (int i = 0; i < bs; i++) val = cuCadd(val, cuCmul(beta_pw_tile[threadIdx.x][i], phi_tile[threadIdx.y][i])); +//== for (int i = 0; i < bs; i++) val = hipCadd(val, hipCmul(beta_pw_tile[threadIdx.x][i], phi_tile[threadIdx.y][i])); //== //== __syncthreads(); //== } @@ -204,17 +203,15 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== dim3 threadsPerBlock(BLOCK_SIZE, BLOCK_SIZE); //== dim3 numBlocks(num_blocks(num_beta, BLOCK_SIZE), num_blocks(num_phi, BLOCK_SIZE)); //== -//== generate_beta_phi_gpu_kernel<<< -//== numBlocks, -//== threadsPerBlock>>>(num_gkvec, +//== hipLaunchKernelGGL((generate_beta_phi_gpu_kernel), dim3(//== numBlocks), dim3(//== threadsPerBlock), 0, 0, num_gkvec, //== num_beta, //== num_phi, //== beta_t_idx, //== atom_pos, //== gkvec, -//== (cuDoubleComplex*)beta_pw_type, -//== (cuDoubleComplex*)phi, -//== (cuDoubleComplex*)beta_phi); +//== (hipDoubleComplex*)beta_pw_type, +//== (hipDoubleComplex*)phi, +//== (hipDoubleComplex*)beta_phi); //== } @@ -222,33 +219,33 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //__global__ void copy_beta_psi_gpu_kernel //( -// cuDoubleComplex const* beta_psi, +// hipDoubleComplex const* beta_psi, // int beta_psi_ld, // double const* wo, -// cuDoubleComplex* beta_psi_wo, +// hipDoubleComplex* beta_psi_wo, // int beta_psi_wo_ld //) //{ // int xi = threadIdx.x; // int j = blockIdx.x; // -// beta_psi_wo[array2D_offset(xi, j, beta_psi_wo_ld)] = cuCmul(cuConj(beta_psi[array2D_offset(xi, j, beta_psi_ld)]), -// make_cuDoubleComplex(wo[j], 0.0)); +// beta_psi_wo[array2D_offset(xi, j, beta_psi_wo_ld)] = hipCmul(hipConj(beta_psi[array2D_offset(xi, j, beta_psi_ld)]), +// make_hipDoubleComplex(wo[j], 0.0)); //} //extern "C" void copy_beta_psi_gpu(int nbf, // int nloc, -// cuDoubleComplex const* beta_psi, +// hipDoubleComplex const* beta_psi, // int beta_psi_ld, // double const* wo, -// cuDoubleComplex* beta_psi_wo, +// hipDoubleComplex* beta_psi_wo, // int beta_psi_wo_ld, // int stream_id) //{ // dim3 grid_t(nbf); // dim3 grid_b(nloc); // -// cudaStream_t stream = (stream_id == -1) ? NULL : streams[stream_id]; +// hipStream_t stream = (stream_id == -1) ? NULL : streams[stream_id]; // // copy_beta_psi_gpu_kernel <<>> // ( @@ -263,14 +260,14 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, __global__ void compute_inner_product_gpu_kernel ( int num_gkvec_row, - cuDoubleComplex const* f1, - cuDoubleComplex const* f2, + hipDoubleComplex const* f1, + hipDoubleComplex const* f2, double* prod ) { int N = num_blocks(num_gkvec_row, blockDim.x); - extern __shared__ char sdata_ptr[]; + HIP_DYNAMIC_SHARED( char, sdata_ptr) double* sdata = (double*)&sdata_ptr[0]; sdata[threadIdx.x] = 0.0; @@ -298,15 +295,14 @@ __global__ void compute_inner_product_gpu_kernel extern "C" void compute_inner_product_gpu(int num_gkvec_row, int n, - cuDoubleComplex const* f1, - cuDoubleComplex const* f2, + hipDoubleComplex const* f1, + hipDoubleComplex const* f2, double* prod) { dim3 grid_t(64); dim3 grid_b(n); - compute_inner_product_gpu_kernel <<>> - ( + hipLaunchKernelGGL((compute_inner_product_gpu_kernel), dim3(grid_b), dim3(grid_t), grid_t.x * sizeof(double), 0, num_gkvec_row, f1, f2, @@ -317,14 +313,14 @@ extern "C" void compute_inner_product_gpu(int num_gkvec_row, __global__ void add_checksum_gpu_kernel ( - cuDoubleComplex const* wf__, + hipDoubleComplex const* wf__, int num_rows_loc__, - cuDoubleComplex* result__ + hipDoubleComplex* result__ ) { int N = num_blocks(num_rows_loc__, blockDim.x); - extern __shared__ char sdata_ptr[]; + HIP_DYNAMIC_SHARED( char, sdata_ptr) double* sdata_x = (double*)&sdata_ptr[0]; double* sdata_y = (double*)&sdata_ptr[blockDim.x * sizeof(double)]; @@ -349,19 +345,18 @@ __global__ void add_checksum_gpu_kernel __syncthreads(); } - result__[blockIdx.x] = cuCadd(result__[blockIdx.x], make_cuDoubleComplex(sdata_x[0], sdata_y[0])); + result__[blockIdx.x] = hipCadd(result__[blockIdx.x], make_hipDoubleComplex(sdata_x[0], sdata_y[0])); } -extern "C" void add_checksum_gpu(cuDoubleComplex* wf__, +extern "C" void add_checksum_gpu(hipDoubleComplex* wf__, int num_rows_loc__, int nwf__, - cuDoubleComplex* result__) + hipDoubleComplex* result__) { dim3 grid_t(64); dim3 grid_b(nwf__); - add_checksum_gpu_kernel <<>> - ( + hipLaunchKernelGGL((add_checksum_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), 0, wf__, num_rows_loc__, result__ diff --git a/src/Kernels/cuda_uspp_kernels.hip.cpp b/src/Kernels/cuda_uspp_kernels.hip.cpp new file mode 100644 index 000000000..95f3ae530 --- /dev/null +++ b/src/Kernels/cuda_uspp_kernels.hip.cpp @@ -0,0 +1 @@ +#include "cuda_uspp_kernels.cu" diff --git a/src/Kernels/density_rg.cu b/src/Kernels/density_rg.cu index e23158f3c..0cd208551 100644 --- a/src/Kernels/density_rg.cu +++ b/src/Kernels/density_rg.cu @@ -23,23 +23,24 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" __global__ void update_density_rg_1_gpu_kernel(int size__, - cuDoubleComplex const* psi_rg__, + hipDoubleComplex const* psi_rg__, double wt__, double* density_rg__) { int ir = blockIdx.x * blockDim.x + threadIdx.x; if (ir < size__) { - cuDoubleComplex z = psi_rg__[ir]; + hipDoubleComplex z = psi_rg__[ir]; density_rg__[ir] += (z.x * z.x + z.y * z.y) * wt__; } } extern "C" void update_density_rg_1_gpu(int size__, - cuDoubleComplex const* psi_rg__, + hipDoubleComplex const* psi_rg__, double wt__, double* density_rg__) { @@ -48,8 +49,7 @@ extern "C" void update_density_rg_1_gpu(int size__, dim3 grid_t(64); dim3 grid_b(num_blocks(size__, grid_t.x)); - update_density_rg_1_gpu_kernel <<>> - ( + hipLaunchKernelGGL((update_density_rg_1_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, psi_rg__, wt__, @@ -58,23 +58,23 @@ extern "C" void update_density_rg_1_gpu(int size__, } __global__ void update_density_rg_2_gpu_kernel(int size__, - cuDoubleComplex const* psi_up_rg__, - cuDoubleComplex const* psi_dn_rg__, + hipDoubleComplex const* psi_up_rg__, + hipDoubleComplex const* psi_dn_rg__, double wt__, double* density_x_rg__, double* density_y_rg__) { int ir = blockIdx.x * blockDim.x + threadIdx.x; if (ir < size__) { - cuDoubleComplex z = cuCmul(psi_up_rg__[ir], cuConj(psi_dn_rg__[ir])); + hipDoubleComplex z = hipCmul(psi_up_rg__[ir], hipConj(psi_dn_rg__[ir])); density_x_rg__[ir] += 2 * z.x * wt__; density_y_rg__[ir] -= 2 * z.y * wt__; } } extern "C" void update_density_rg_2_gpu(int size__, - cuDoubleComplex const* psi_up_rg__, - cuDoubleComplex const* psi_dn_rg__, + hipDoubleComplex const* psi_up_rg__, + hipDoubleComplex const* psi_dn_rg__, double wt__, double* density_x_rg__, double* density_y_rg__) @@ -84,8 +84,7 @@ extern "C" void update_density_rg_2_gpu(int size__, dim3 grid_t(64); dim3 grid_b(num_blocks(size__, grid_t.x)); - update_density_rg_2_gpu_kernel <<>> - ( + hipLaunchKernelGGL((update_density_rg_2_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, psi_up_rg__, psi_dn_rg__, diff --git a/src/Kernels/density_rg.hip.cpp b/src/Kernels/density_rg.hip.cpp new file mode 100644 index 000000000..a00c4483f --- /dev/null +++ b/src/Kernels/density_rg.hip.cpp @@ -0,0 +1 @@ +#include "density_rg.cu" diff --git a/src/Kernels/generate_dm_pw.cu b/src/Kernels/generate_dm_pw.cu index ff006ca9b..fac5ed75b 100644 --- a/src/Kernels/generate_dm_pw.cu +++ b/src/Kernels/generate_dm_pw.cu @@ -24,8 +24,12 @@ #include "../SDDK/GPU/cuda_common.hpp" #include "../SDDK/GPU/acc.hpp" +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" + +#ifdef __CUDA #include "../SDDK/GPU/cublas.hpp" -#include +#endif __global__ void generate_phase_factors_conj_gpu_kernel ( @@ -33,7 +37,7 @@ __global__ void generate_phase_factors_conj_gpu_kernel int num_atoms__, double const* atom_pos__, int const* gvec__, - cuDoubleComplex* phase_factors__ + hipDoubleComplex* phase_factors__ ) { int ia = blockIdx.y; @@ -49,7 +53,7 @@ __global__ void generate_phase_factors_conj_gpu_kernel int gvz = gvec__[array2D_offset(igloc, 2, num_gvec_loc__)]; double p = twopi * (ax * gvx + ay * gvy + az * gvz); - phase_factors__[array2D_offset(igloc, ia, num_gvec_loc__)] = make_cuDoubleComplex(cos(p), -sin(p)); + phase_factors__[array2D_offset(igloc, ia, num_gvec_loc__)] = make_hipDoubleComplex(cos(p), -sin(p)); } } @@ -65,23 +69,23 @@ extern "C" void generate_dm_pw_gpu(int num_atoms__, { //CUDA_timer t("generate_dm_pw_gpu"); - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = acc::stream(stream_id(stream_id__)); dim3 grid_t(32); dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__); - generate_phase_factors_conj_gpu_kernel<<>> - ( + hipLaunchKernelGGL((generate_phase_factors_conj_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, num_gvec_loc__, num_atoms__, atom_pos__, gvec__, - (cuDoubleComplex*)phase_factors__ + (hipDoubleComplex*)phase_factors__ ); double alpha = 1; double beta = 0; +#ifdef __CUDA cublas::dgemm('N', 'T', nbf__ * (nbf__ + 1) / 2, num_gvec_loc__ * 2, num_atoms__, &alpha, dm__, nbf__ * (nbf__ + 1) / 2, @@ -89,6 +93,9 @@ extern "C" void generate_dm_pw_gpu(int num_atoms__, &beta, dm_pw__, nbf__ * (nbf__ + 1) / 2, stream_id__); +#else + throw std::runtime_error("not implemented for non-CUDA."); +#endif acc::sync_stream(stream_id(stream_id__)); } diff --git a/src/Kernels/generate_dm_pw.hip.cpp b/src/Kernels/generate_dm_pw.hip.cpp new file mode 100644 index 000000000..685713609 --- /dev/null +++ b/src/Kernels/generate_dm_pw.hip.cpp @@ -0,0 +1 @@ +#include "generate_dm_pw.cu" diff --git a/src/Kernels/generate_phase_factors.cu b/src/Kernels/generate_phase_factors.cu index 2c5caa696..d6fb737ba 100644 --- a/src/Kernels/generate_phase_factors.cu +++ b/src/Kernels/generate_phase_factors.cu @@ -23,7 +23,8 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" __global__ void generate_phase_factors_gpu_kernel ( @@ -31,7 +32,7 @@ __global__ void generate_phase_factors_gpu_kernel int num_atoms, double const* atom_pos, int const* gvec, - cuDoubleComplex* phase_factors + hipDoubleComplex* phase_factors ) { int ia = blockIdx.y; @@ -51,7 +52,7 @@ __global__ void generate_phase_factors_gpu_kernel double sinp = sin(p); double cosp = cos(p); - phase_factors[array2D_offset(igloc, ia, num_gvec_loc)] = make_cuDoubleComplex(cosp, sinp); + phase_factors[array2D_offset(igloc, ia, num_gvec_loc)] = make_hipDoubleComplex(cosp, sinp); } } @@ -60,14 +61,13 @@ extern "C" void generate_phase_factors_gpu(int num_gvec_loc__, int num_atoms__, int const* gvec__, double const* atom_pos__, - cuDoubleComplex* phase_factors__) + hipDoubleComplex* phase_factors__) { dim3 grid_t(32); dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__); - generate_phase_factors_gpu_kernel<<>> - ( + hipLaunchKernelGGL((generate_phase_factors_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gvec_loc__, num_atoms__, atom_pos__, diff --git a/src/Kernels/generate_phase_factors.hip.cpp b/src/Kernels/generate_phase_factors.hip.cpp new file mode 100644 index 000000000..e3e1cd8da --- /dev/null +++ b/src/Kernels/generate_phase_factors.hip.cpp @@ -0,0 +1 @@ +#include "generate_phase_factors.cu" diff --git a/src/Kernels/mul_by_veff.cu b/src/Kernels/mul_by_veff.cu index 8d59fe60d..9dc0efc77 100644 --- a/src/Kernels/mul_by_veff.cu +++ b/src/Kernels/mul_by_veff.cu @@ -24,77 +24,78 @@ #include "../SDDK/GPU/cuda_common.hpp" #include "../SDDK/GPU/acc.hpp" -#include +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" __global__ void mul_by_veff0_gpu_kernel(int size__, double* const* veff__, - cuDoubleComplex* buf__) + hipDoubleComplex* buf__) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < size__) { - cuDoubleComplex z = buf__[i]; + hipDoubleComplex z = buf__[i]; double v0 = veff__[0][i]; - buf__[i] = make_cuDoubleComplex(z.x * v0, z.y * v0); + buf__[i] = make_hipDoubleComplex(z.x * v0, z.y * v0); } } __global__ void mul_by_veff1_gpu_kernel(int size__, double* const* veff__, - cuDoubleComplex* buf__) + hipDoubleComplex* buf__) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < size__) { - cuDoubleComplex z = buf__[i]; + hipDoubleComplex z = buf__[i]; double v1 = veff__[1][i]; - buf__[i] = make_cuDoubleComplex(z.x * v1, z.y * v1); + buf__[i] = make_hipDoubleComplex(z.x * v1, z.y * v1); } } __global__ void mul_by_veff2_gpu_kernel(int size__, double* const* veff__, - cuDoubleComplex* buf__) + hipDoubleComplex* buf__) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < size__) { - cuDoubleComplex z = buf__[i]; - cuDoubleComplex v = make_cuDoubleComplex(veff__[2][i], -veff__[3][i]); - buf__[i] = cuCmul(z, v); + hipDoubleComplex z = buf__[i]; + hipDoubleComplex v = make_hipDoubleComplex(veff__[2][i], -veff__[3][i]); + buf__[i] = hipCmul(z, v); } } __global__ void mul_by_veff3_gpu_kernel(int size__, double* const* veff__, - cuDoubleComplex* buf__) + hipDoubleComplex* buf__) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < size__) { - cuDoubleComplex z = buf__[i]; - cuDoubleComplex v = make_cuDoubleComplex(veff__[2][i], veff__[3][i]); - buf__[i] = cuCmul(z, v); + hipDoubleComplex z = buf__[i]; + hipDoubleComplex v = make_hipDoubleComplex(veff__[2][i], veff__[3][i]); + buf__[i] = hipCmul(z, v); } } -extern "C" void mul_by_veff_gpu(int ispn__, int size__, double* const* veff__, cuDoubleComplex* buf__) +extern "C" void mul_by_veff_gpu(int ispn__, int size__, double* const* veff__, hipDoubleComplex* buf__) { dim3 grid_t(64); dim3 grid_b(num_blocks(size__, grid_t.x)); switch (ispn__) { case 0: { - mul_by_veff0_gpu_kernel<<>>(size__, veff__, buf__); + hipLaunchKernelGGL((mul_by_veff0_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); break; } case 1: { - mul_by_veff1_gpu_kernel<<>>(size__, veff__, buf__); + hipLaunchKernelGGL((mul_by_veff1_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); break; } case 2: { - mul_by_veff2_gpu_kernel<<>>(size__, veff__, buf__); + hipLaunchKernelGGL((mul_by_veff2_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); break; } case 3: { - mul_by_veff3_gpu_kernel<<>>(size__, veff__, buf__); + hipLaunchKernelGGL((mul_by_veff3_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); break; } } diff --git a/src/Kernels/mul_by_veff.hip.cpp b/src/Kernels/mul_by_veff.hip.cpp new file mode 100644 index 000000000..d5495f74a --- /dev/null +++ b/src/Kernels/mul_by_veff.hip.cpp @@ -0,0 +1 @@ +#include "mul_by_veff.cu" diff --git a/src/Kernels/mul_veff_with_phase_factors.cu b/src/Kernels/mul_veff_with_phase_factors.cu index e6d62fcb4..d210c8b12 100644 --- a/src/Kernels/mul_veff_with_phase_factors.cu +++ b/src/Kernels/mul_veff_with_phase_factors.cu @@ -24,14 +24,15 @@ #include "../SDDK/GPU/cuda_common.hpp" #include "../SDDK/GPU/acc.hpp" -#include +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" __global__ void mul_veff_with_phase_factors_gpu_kernel(int num_gvec_loc__, - cuDoubleComplex const* veff__, + hipDoubleComplex const* veff__, int const* gvec__, int num_atoms__, double const* atom_pos__, - cuDoubleComplex* veff_a__) + hipDoubleComplex* veff_a__) { int ia = blockIdx.y; double ax = atom_pos__[array2D_offset(ia, 0, num_atoms__)]; @@ -47,14 +48,14 @@ __global__ void mul_veff_with_phase_factors_gpu_kernel(int num_gvec_loc__, double p = twopi * (ax * gvx + ay * gvy + az * gvz); - //veff_a__[array2D_offset(igloc, ia, num_gvec_loc__)] = cuConj(cuCmul(veff__[igloc], make_cuDoubleComplex(cos(p), sin(p)))); - veff_a__[array2D_offset(igloc, ia, num_gvec_loc__)] = cuCmul(veff__[igloc], make_cuDoubleComplex(cos(p), sin(p))); + //veff_a__[array2D_offset(igloc, ia, num_gvec_loc__)] = hipConj(hipCmul(veff__[igloc], make_hipDoubleComplex(cos(p), sin(p)))); + veff_a__[array2D_offset(igloc, ia, num_gvec_loc__)] = hipCmul(veff__[igloc], make_hipDoubleComplex(cos(p), sin(p))); } } extern "C" void mul_veff_with_phase_factors_gpu(int num_atoms__, int num_gvec_loc__, - cuDoubleComplex const* veff__, + hipDoubleComplex const* veff__, int const* gvec__, double const* atom_pos__, double* veff_a__, @@ -63,15 +64,14 @@ extern "C" void mul_veff_with_phase_factors_gpu(int num_atoms__, dim3 grid_t(64); dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__); - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = acc::stream(stream_id(stream_id__)); - mul_veff_with_phase_factors_gpu_kernel <<>> - ( + hipLaunchKernelGGL((mul_veff_with_phase_factors_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, num_gvec_loc__, veff__, gvec__, num_atoms__, atom_pos__, - (cuDoubleComplex*)veff_a__ + (hipDoubleComplex*)veff_a__ ); } diff --git a/src/Kernels/mul_veff_with_phase_factors.hip.cpp b/src/Kernels/mul_veff_with_phase_factors.hip.cpp new file mode 100644 index 000000000..970575979 --- /dev/null +++ b/src/Kernels/mul_veff_with_phase_factors.hip.cpp @@ -0,0 +1 @@ +#include "mul_veff_with_phase_factors.cu" diff --git a/src/Kernels/random.cu b/src/Kernels/random.cu index 42e901d84..ea9fe99b1 100644 --- a/src/Kernels/random.cu +++ b/src/Kernels/random.cu @@ -21,6 +21,7 @@ * * \brief CUDA kernel to compute simple random noise on GPU. */ +//#include "hip/hip_runtime.h" //== inline __device__ uint32_t random(size_t seed) //== { diff --git a/src/Kernels/random.hip.cpp b/src/Kernels/random.hip.cpp new file mode 100644 index 000000000..e77f5d698 --- /dev/null +++ b/src/Kernels/random.hip.cpp @@ -0,0 +1 @@ +#include "random.cu" diff --git a/src/Kernels/residuals_aux.cu b/src/Kernels/residuals_aux.cu index 0e6358499..797e7b0b5 100644 --- a/src/Kernels/residuals_aux.cu +++ b/src/Kernels/residuals_aux.cu @@ -23,15 +23,16 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" __global__ void compute_residuals_gpu_kernel ( int const num_rows_loc__, double const* eval__, - cuDoubleComplex const* hpsi__, - cuDoubleComplex const* opsi__, - cuDoubleComplex* res__ + hipDoubleComplex const* hpsi__, + hipDoubleComplex const* opsi__, + hipDoubleComplex* res__ ) { int j = blockIdx.x * blockDim.x + threadIdx.x; @@ -40,7 +41,7 @@ __global__ void compute_residuals_gpu_kernel if (j < num_rows_loc__) { int k = array2D_offset(j, ibnd, num_rows_loc__); /* res = hpsi_j - e_j * opsi_j */ - res__[k] = cuCsub(hpsi__[k], make_cuDoubleComplex(opsi__[k].x * eval__[ibnd], opsi__[k].y * eval__[ibnd])); + res__[k] = hipCsub(hpsi__[k], make_hipDoubleComplex(opsi__[k].x * eval__[ibnd], opsi__[k].y * eval__[ibnd])); } } @@ -48,7 +49,7 @@ __global__ void compute_residuals_gpu_kernel //== ( //== int num_gkvec_row, //== int* res_idx, -//== cuDoubleComplex const* res, +//== hipDoubleComplex const* res, //== double* res_norm, //== int reduced, //== int mpi_rank @@ -56,7 +57,7 @@ __global__ void compute_residuals_gpu_kernel //== { //== int N = num_blocks(num_gkvec_row, blockDim.x); //== -//== extern __shared__ char sdata_ptr[]; +//== HIP_DYNAMIC_SHARED( char, sdata_ptr) //== double* sdata = (double*)&sdata_ptr[0]; //== //== sdata[threadIdx.x] = 0.0; @@ -100,11 +101,11 @@ __global__ void compute_residuals_gpu_kernel //== int num_res_local__, //== int* res_idx__, //== double* eval__, -//== cuDoubleComplex const* hpsi__, -//== cuDoubleComplex const* opsi__, +//== hipDoubleComplex const* hpsi__, +//== hipDoubleComplex const* opsi__, //== double const* h_diag__, //== double const* o_diag__, -//== cuDoubleComplex* res__, +//== hipDoubleComplex* res__, //== double* res_norm__, //== double* p_norm__, //== int gkvec_reduced__, @@ -159,9 +160,9 @@ __global__ void compute_residuals_gpu_kernel //== ); //== } -extern "C" void compute_residuals_gpu(cuDoubleComplex* hpsi__, - cuDoubleComplex* opsi__, - cuDoubleComplex* res__, +extern "C" void compute_residuals_gpu(hipDoubleComplex* hpsi__, + hipDoubleComplex* opsi__, + hipDoubleComplex* res__, int num_rows_loc__, int num_bands__, double* eval__) @@ -169,8 +170,7 @@ extern "C" void compute_residuals_gpu(cuDoubleComplex* hpsi__, dim3 grid_t(64); dim3 grid_b(num_blocks(num_rows_loc__, grid_t.x), num_bands__); - compute_residuals_gpu_kernel <<>> - ( + hipLaunchKernelGGL((compute_residuals_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_rows_loc__, eval__, hpsi__, @@ -182,7 +182,7 @@ extern "C" void compute_residuals_gpu(cuDoubleComplex* hpsi__, __global__ void add_square_sum_gpu_kernel ( int num_rows_loc__, - cuDoubleComplex const* wf__, + hipDoubleComplex const* wf__, int reduced__, int mpi_rank__, double* result__ @@ -190,7 +190,7 @@ __global__ void add_square_sum_gpu_kernel { int N = num_blocks(num_rows_loc__, blockDim.x); - extern __shared__ char sdata_ptr[]; + HIP_DYNAMIC_SHARED( char, sdata_ptr) double* sdata = (double*)&sdata_ptr[0]; sdata[threadIdx.x] = 0.0; @@ -226,7 +226,7 @@ __global__ void add_square_sum_gpu_kernel } } -extern "C" void add_square_sum_gpu(cuDoubleComplex* wf__, +extern "C" void add_square_sum_gpu(hipDoubleComplex* wf__, int num_rows_loc__, int nwf__, int reduced__, @@ -236,8 +236,7 @@ extern "C" void add_square_sum_gpu(cuDoubleComplex* wf__, dim3 grid_t(64); dim3 grid_b(nwf__); - add_square_sum_gpu_kernel <<>> - ( + hipLaunchKernelGGL((add_square_sum_gpu_kernel), dim3(grid_b), dim3(grid_t), grid_t.x * sizeof(double), 0, num_rows_loc__, wf__, reduced__, @@ -250,7 +249,7 @@ __global__ void apply_preconditioner_gpu_kernel(int const num_rows_loc__, double const* eval__, double const* h_diag__, double const* o_diag__, - cuDoubleComplex* res__) + hipDoubleComplex* res__) { int j = blockIdx.x * blockDim.x + threadIdx.x; int ibnd = blockIdx.y; @@ -259,11 +258,11 @@ __global__ void apply_preconditioner_gpu_kernel(int const num_rows_loc__, double p = (h_diag__[j] - eval__[ibnd] * o_diag__[j]); p = 0.5 * (1 + p + sqrt(1.0 + (p - 1) * (p - 1))); int k = array2D_offset(j, ibnd, num_rows_loc__); - res__[k] = make_cuDoubleComplex(res__[k].x / p, res__[k].y / p); + res__[k] = make_hipDoubleComplex(res__[k].x / p, res__[k].y / p); } } -extern "C" void apply_preconditioner_gpu(cuDoubleComplex* res__, +extern "C" void apply_preconditioner_gpu(hipDoubleComplex* res__, int num_rows_loc__, int num_bands__, double* eval__, @@ -273,26 +272,26 @@ extern "C" void apply_preconditioner_gpu(cuDoubleComplex* res__, dim3 grid_t(64); dim3 grid_b(num_blocks(num_rows_loc__, grid_t.x), num_bands__); - apply_preconditioner_gpu_kernel <<>> (num_rows_loc__, eval__, h_diag__, o_diag__, res__); + hipLaunchKernelGGL((apply_preconditioner_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_rows_loc__, eval__, h_diag__, o_diag__, res__); } -__global__ void make_real_g0_gpu_kernel(cuDoubleComplex* res__, +__global__ void make_real_g0_gpu_kernel(hipDoubleComplex* res__, int ld__) { - cuDoubleComplex z = res__[array2D_offset(0, blockIdx.x, ld__)]; + hipDoubleComplex z = res__[array2D_offset(0, blockIdx.x, ld__)]; if (threadIdx.x == 0) { - res__[array2D_offset(0, blockIdx.x, ld__)] = make_cuDoubleComplex(z.x, 0); + res__[array2D_offset(0, blockIdx.x, ld__)] = make_hipDoubleComplex(z.x, 0); } } -extern "C" void make_real_g0_gpu(cuDoubleComplex* res__, +extern "C" void make_real_g0_gpu(hipDoubleComplex* res__, int ld__, int n__) { dim3 grid_t(32); dim3 grid_b(n__); - make_real_g0_gpu_kernel <<>> (res__, ld__); + hipLaunchKernelGGL((make_real_g0_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, res__, ld__); } diff --git a/src/Kernels/residuals_aux.hip.cpp b/src/Kernels/residuals_aux.hip.cpp new file mode 100644 index 000000000..57c09e6ae --- /dev/null +++ b/src/Kernels/residuals_aux.hip.cpp @@ -0,0 +1 @@ +#include "residuals_aux.cu" diff --git a/src/Kernels/spline.cu b/src/Kernels/spline.cu index cb9d5eec0..4d4173491 100644 --- a/src/Kernels/spline.cu +++ b/src/Kernels/spline.cu @@ -23,7 +23,8 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" __global__ void spline_inner_product_gpu_kernel_v3(int num_points__, int const* idx_ri__, @@ -37,7 +38,7 @@ __global__ void spline_inner_product_gpu_kernel_v3(int num_points__, int idx_f = idx_ri__[array2D_offset(0, blockIdx.x, 2)]; int idx_g = idx_ri__[array2D_offset(1, blockIdx.x, 2)]; - extern __shared__ char sdata_ptr[]; + HIP_DYNAMIC_SHARED( char, sdata_ptr) double* sdata = (double*)&sdata_ptr[0]; int a_offs_f = array3D_offset(0, 0, idx_f, num_points__, 4); @@ -138,8 +139,7 @@ extern "C" void spline_inner_product_gpu_v3(int const* idx_ri__, dim3 grid_t(64); dim3 grid_b(num_ri__); - spline_inner_product_gpu_kernel_v3 <<>> - ( + hipLaunchKernelGGL((spline_inner_product_gpu_kernel_v3), dim3(grid_b), dim3(grid_t), grid_t.x * sizeof(double), 0, num_points__, idx_ri__, x__, diff --git a/src/Kernels/spline.hip.cpp b/src/Kernels/spline.hip.cpp new file mode 100644 index 000000000..3152a6f7e --- /dev/null +++ b/src/Kernels/spline.hip.cpp @@ -0,0 +1 @@ +#include "spline.cu" diff --git a/src/Kernels/sum_q_pw_dm_pw.cu b/src/Kernels/sum_q_pw_dm_pw.cu index 3fc8ffce4..6582da568 100644 --- a/src/Kernels/sum_q_pw_dm_pw.cu +++ b/src/Kernels/sum_q_pw_dm_pw.cu @@ -24,8 +24,12 @@ #include "../SDDK/GPU/cuda_common.hpp" #include "../SDDK/GPU/acc.hpp" +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" + +#ifdef __CUDA #include "../SDDK/GPU/cuda_timer.hpp" -#include +#endif __global__ void sum_q_pw_dm_pw_gpu_kernel ( @@ -33,10 +37,10 @@ __global__ void sum_q_pw_dm_pw_gpu_kernel double const* q_pw__, double const* dm_pw__, double const* sym_weight__, - cuDoubleComplex* rho_pw__ + hipDoubleComplex* rho_pw__ ) { - extern __shared__ char sdata_ptr[]; + HIP_DYNAMIC_SHARED( char, sdata_ptr) double* rho_re = (double*)&sdata_ptr[0]; double* rho_im = (double*)&sdata_ptr[sizeof(double) * blockDim.x]; @@ -71,7 +75,7 @@ __global__ void sum_q_pw_dm_pw_gpu_kernel __syncthreads(); } if (threadIdx.x == 0) { - rho_pw__[igloc] = cuCadd(rho_pw__[igloc], make_cuDoubleComplex(rho_re[0], rho_im[0])); + rho_pw__[igloc] = hipCadd(rho_pw__[igloc], make_hipDoubleComplex(rho_re[0], rho_im[0])); } } @@ -80,18 +84,19 @@ extern "C" void sum_q_pw_dm_pw_gpu(int num_gvec_loc__, double const* q_pw__, double const* dm_pw__, double const* sym_weight__, - cuDoubleComplex* rho_pw__, + hipDoubleComplex* rho_pw__, int stream_id__) { +#ifdef __CUDA CUDA_timer t("sum_q_pw_dm_pw_gpu"); +#endif - cudaStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_gvec_loc__); - sum_q_pw_dm_pw_gpu_kernel <<>> - ( + hipLaunchKernelGGL((sum_q_pw_dm_pw_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), stream, nbf__, q_pw__, dm_pw__, diff --git a/src/Kernels/sum_q_pw_dm_pw.hip.cpp b/src/Kernels/sum_q_pw_dm_pw.hip.cpp new file mode 100644 index 000000000..292adfb9b --- /dev/null +++ b/src/Kernels/sum_q_pw_dm_pw.hip.cpp @@ -0,0 +1 @@ +#include "sum_q_pw_dm_pw.cu" diff --git a/src/SDDK/GPU/cuda_common.hpp b/src/SDDK/GPU/cuda_common.hpp index c3c461a42..3dcf1f60b 100644 --- a/src/SDDK/GPU/cuda_common.hpp +++ b/src/SDDK/GPU/cuda_common.hpp @@ -25,6 +25,10 @@ #ifndef __CUDA_COMMON_HPP__ #define __CUDA_COMMON_HPP__ +#include +#include "hip/hip_runtime.h" +#include "hip/hip_complex.h" + const double twopi = 6.2831853071795864769; inline __device__ size_t array2D_offset(int i0, int i1, int ld0) From c53080dfe1b332779088d786d330477ae439d003 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Tue, 19 Feb 2019 17:40:50 +0100 Subject: [PATCH 03/28] removed .hip.cpp dummy files --- src/Kernels/add_pw_ekin.hip.cpp | 1 - src/Kernels/create_beta_gk.hip.cpp | 1 - src/Kernels/cuda_uspp_kernels.hip.cpp | 1 - src/Kernels/density_rg.hip.cpp | 1 - src/Kernels/generate_dm_pw.hip.cpp | 1 - src/Kernels/generate_phase_factors.hip.cpp | 1 - src/Kernels/mul_by_veff.hip.cpp | 1 - src/Kernels/mul_veff_with_phase_factors.hip.cpp | 1 - src/Kernels/random.hip.cpp | 1 - src/Kernels/residuals_aux.hip.cpp | 1 - src/Kernels/spline.hip.cpp | 1 - src/Kernels/sum_q_pw_dm_pw.hip.cpp | 1 - src/SDDK/GPU/fft_kernels.hip.cpp | 2 -- 13 files changed, 14 deletions(-) delete mode 100644 src/Kernels/add_pw_ekin.hip.cpp delete mode 100644 src/Kernels/create_beta_gk.hip.cpp delete mode 100644 src/Kernels/cuda_uspp_kernels.hip.cpp delete mode 100644 src/Kernels/density_rg.hip.cpp delete mode 100644 src/Kernels/generate_dm_pw.hip.cpp delete mode 100644 src/Kernels/generate_phase_factors.hip.cpp delete mode 100644 src/Kernels/mul_by_veff.hip.cpp delete mode 100644 src/Kernels/mul_veff_with_phase_factors.hip.cpp delete mode 100644 src/Kernels/random.hip.cpp delete mode 100644 src/Kernels/residuals_aux.hip.cpp delete mode 100644 src/Kernels/spline.hip.cpp delete mode 100644 src/Kernels/sum_q_pw_dm_pw.hip.cpp delete mode 100644 src/SDDK/GPU/fft_kernels.hip.cpp diff --git a/src/Kernels/add_pw_ekin.hip.cpp b/src/Kernels/add_pw_ekin.hip.cpp deleted file mode 100644 index a30c1a193..000000000 --- a/src/Kernels/add_pw_ekin.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "add_pw_ekin.cu" diff --git a/src/Kernels/create_beta_gk.hip.cpp b/src/Kernels/create_beta_gk.hip.cpp deleted file mode 100644 index cb49ea3a5..000000000 --- a/src/Kernels/create_beta_gk.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "create_beta_gk.cu" diff --git a/src/Kernels/cuda_uspp_kernels.hip.cpp b/src/Kernels/cuda_uspp_kernels.hip.cpp deleted file mode 100644 index 95f3ae530..000000000 --- a/src/Kernels/cuda_uspp_kernels.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "cuda_uspp_kernels.cu" diff --git a/src/Kernels/density_rg.hip.cpp b/src/Kernels/density_rg.hip.cpp deleted file mode 100644 index a00c4483f..000000000 --- a/src/Kernels/density_rg.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "density_rg.cu" diff --git a/src/Kernels/generate_dm_pw.hip.cpp b/src/Kernels/generate_dm_pw.hip.cpp deleted file mode 100644 index 685713609..000000000 --- a/src/Kernels/generate_dm_pw.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "generate_dm_pw.cu" diff --git a/src/Kernels/generate_phase_factors.hip.cpp b/src/Kernels/generate_phase_factors.hip.cpp deleted file mode 100644 index e3e1cd8da..000000000 --- a/src/Kernels/generate_phase_factors.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "generate_phase_factors.cu" diff --git a/src/Kernels/mul_by_veff.hip.cpp b/src/Kernels/mul_by_veff.hip.cpp deleted file mode 100644 index d5495f74a..000000000 --- a/src/Kernels/mul_by_veff.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "mul_by_veff.cu" diff --git a/src/Kernels/mul_veff_with_phase_factors.hip.cpp b/src/Kernels/mul_veff_with_phase_factors.hip.cpp deleted file mode 100644 index 970575979..000000000 --- a/src/Kernels/mul_veff_with_phase_factors.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "mul_veff_with_phase_factors.cu" diff --git a/src/Kernels/random.hip.cpp b/src/Kernels/random.hip.cpp deleted file mode 100644 index e77f5d698..000000000 --- a/src/Kernels/random.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "random.cu" diff --git a/src/Kernels/residuals_aux.hip.cpp b/src/Kernels/residuals_aux.hip.cpp deleted file mode 100644 index 57c09e6ae..000000000 --- a/src/Kernels/residuals_aux.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "residuals_aux.cu" diff --git a/src/Kernels/spline.hip.cpp b/src/Kernels/spline.hip.cpp deleted file mode 100644 index 3152a6f7e..000000000 --- a/src/Kernels/spline.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "spline.cu" diff --git a/src/Kernels/sum_q_pw_dm_pw.hip.cpp b/src/Kernels/sum_q_pw_dm_pw.hip.cpp deleted file mode 100644 index 292adfb9b..000000000 --- a/src/Kernels/sum_q_pw_dm_pw.hip.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "sum_q_pw_dm_pw.cu" diff --git a/src/SDDK/GPU/fft_kernels.hip.cpp b/src/SDDK/GPU/fft_kernels.hip.cpp deleted file mode 100644 index adb876051..000000000 --- a/src/SDDK/GPU/fft_kernels.hip.cpp +++ /dev/null @@ -1,2 +0,0 @@ -// dummy file for compilation with hipcc, because compiling .cu files for amd gpus does not work -#include "fft_kernels.cu" From 313363183b2758f4cbf6a27d1c38207d96621789 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Tue, 19 Feb 2019 17:42:11 +0100 Subject: [PATCH 04/28] fixed warnings --- src/SDDK/GPU/rocfft_interface.cpp | 6 +++--- src/SDDK/linalg.hpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/SDDK/GPU/rocfft_interface.cpp b/src/SDDK/GPU/rocfft_interface.cpp index 2599223a8..925b3ff58 100644 --- a/src/SDDK/GPU/rocfft_interface.cpp +++ b/src/SDDK/GPU/rocfft_interface.cpp @@ -91,7 +91,7 @@ void* create_batch_plan(int rank, int* dims, int* embed, int stride, int dist, i // ROCFFT appears to expect dimension to be ordered in reverse (see hipFFT implementation) size_t lengths[3] = {1, 1, 1}; - for (size_t i = 0; i < rank; i++) lengths[i] = dims[rank - 1 - i]; + for (size_t i = 0; i < (size_t)rank; i++) lengths[i] = dims[rank - 1 - i]; if (embed != nullptr) { rocfft_plan_description_create(&desc); @@ -99,9 +99,9 @@ void* create_batch_plan(int rank, int* dims, int* embed, int stride, int dist, i size_t strides[3] = {(size_t)stride, 1, 1}; size_t nembed_lengths[3] = {1, 1, 1}; - for (size_t i = 0; i < rank; i++) nembed_lengths[i] = embed[rank - 1 - i]; + for (size_t i = 0; i < (size_t)rank; i++) nembed_lengths[i] = embed[rank - 1 - i]; - for (size_t i = 1; i < rank; i++) strides[i] = nembed_lengths[i - 1] * strides[i - 1]; + for (size_t i = 1; i < (size_t)rank; i++) strides[i] = nembed_lengths[i - 1] * strides[i - 1]; CALL_ROCFFT( rocfft_plan_description_set_data_layout, diff --git a/src/SDDK/linalg.hpp b/src/SDDK/linalg.hpp index 8ca052ba2..5379ca7b5 100644 --- a/src/SDDK/linalg.hpp +++ b/src/SDDK/linalg.hpp @@ -1183,8 +1183,8 @@ inline void linalg::gemv(int trans__, ftn_int m, ftn_in ftn_double_complex* beta, ftn_double_complex* y, ftn_int incy, int stream_id) { - const char trans[] = {'N', 'T', 'C'}; #if defined(__GPU) && defined(__CUDA) + const char trans[] = {'N', 'T', 'C'}; cublas::zgemv(trans[trans__], m, n, (cuDoubleComplex*)alpha, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)x, incx, (cuDoubleComplex*)beta, (cuDoubleComplex*)y, incy, stream_id); #else throw std::runtime_error("not compiled with cublas"); @@ -1204,8 +1204,8 @@ inline void linalg::gemm(int transa__, int transb__, ft assert(m > 0); assert(n > 0); assert(k > 0); - const char trans[] = {'N', 'T', 'C'}; #if defined(__GPU) && defined(__CUDA) + const char trans[] = {'N', 'T', 'C'}; cublas::zgemm(trans[transa__], trans[transb__], m, n, k, (cuDoubleComplex*)alpha, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)B, ldb, (cuDoubleComplex*)beta, (cuDoubleComplex*)C, ldc, stream_id); #else throw std::runtime_error("not compiled with cublas"); @@ -1225,8 +1225,8 @@ inline void linalg::gemm(int transa__, int transb__, ftn_int m, assert(m > 0); assert(n > 0); assert(k > 0); - const char trans[] = {'N', 'T', 'C'}; #if defined(__GPU) && defined(__CUDA) + const char trans[] = {'N', 'T', 'C'}; cublas::dgemm(trans[transa__], trans[transb__], m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, stream_id); #else throw std::runtime_error("not compiled with cublas"); From a09b41d6ca0dd213ace67cb0b80833253c4e74d1 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Wed, 20 Feb 2019 11:08:38 +0100 Subject: [PATCH 05/28] fixed type warnings --- src/Kernels/generate_dm_pw.cu | 2 +- src/Kernels/mul_veff_with_phase_factors.cu | 2 +- src/Kernels/sum_q_pw_dm_pw.cu | 2 +- src/SDDK/GPU/rocfft_interface.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Kernels/generate_dm_pw.cu b/src/Kernels/generate_dm_pw.cu index fac5ed75b..a070b68d1 100644 --- a/src/Kernels/generate_dm_pw.cu +++ b/src/Kernels/generate_dm_pw.cu @@ -69,7 +69,7 @@ extern "C" void generate_dm_pw_gpu(int num_atoms__, { //CUDA_timer t("generate_dm_pw_gpu"); - hipStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t)acc::stream(stream_id(stream_id__)); dim3 grid_t(32); dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__); diff --git a/src/Kernels/mul_veff_with_phase_factors.cu b/src/Kernels/mul_veff_with_phase_factors.cu index d210c8b12..c306df47c 100644 --- a/src/Kernels/mul_veff_with_phase_factors.cu +++ b/src/Kernels/mul_veff_with_phase_factors.cu @@ -64,7 +64,7 @@ extern "C" void mul_veff_with_phase_factors_gpu(int num_atoms__, dim3 grid_t(64); dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__); - hipStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t)acc::stream(stream_id(stream_id__)); hipLaunchKernelGGL((mul_veff_with_phase_factors_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, num_gvec_loc__, diff --git a/src/Kernels/sum_q_pw_dm_pw.cu b/src/Kernels/sum_q_pw_dm_pw.cu index 6582da568..d588b4bcb 100644 --- a/src/Kernels/sum_q_pw_dm_pw.cu +++ b/src/Kernels/sum_q_pw_dm_pw.cu @@ -91,7 +91,7 @@ extern "C" void sum_q_pw_dm_pw_gpu(int num_gvec_loc__, CUDA_timer t("sum_q_pw_dm_pw_gpu"); #endif - hipStream_t stream = acc::stream(stream_id(stream_id__)); + hipStream_t stream = (hipStream_t)acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_gvec_loc__); diff --git a/src/SDDK/GPU/rocfft_interface.cpp b/src/SDDK/GPU/rocfft_interface.cpp index 925b3ff58..e2face8b6 100644 --- a/src/SDDK/GPU/rocfft_interface.cpp +++ b/src/SDDK/GPU/rocfft_interface.cpp @@ -82,7 +82,7 @@ void* create_batch_plan(int rank, int* dims, int* embed, int stride, int dist, i if (auto_alloc) throw std::runtime_error("Auto allocation for rocfft not implemented!"); // check input - for (size_t i = 0; i < rank; i++) { + for (size_t i = 0; i < (size_t)rank; i++) { if (dims[i] > embed[i]) throw std::runtime_error("Illegal dims or embed parameters for ROCFFT plan creation!"); } From 98519b9680d7029de49962892bcffdeb9a7a1207 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Wed, 20 Feb 2019 11:09:43 +0100 Subject: [PATCH 06/28] added AMD ROCM HIP headers for compilation with CUDA --- src/utils/amd_hip/hip/channel_descriptor.h | 39 + src/utils/amd_hip/hip/device_functions.h | 36 + src/utils/amd_hip/hip/driver_types.h | 36 + .../hip/hcc_detail/channel_descriptor.h | 346 + .../hip/hcc_detail/code_object_bundle.hpp | 139 + src/utils/amd_hip/hip/hcc_detail/concepts.hpp | 30 + src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h | 1 + .../hip/hcc_detail/cuda/math_functions.h | 1 + .../amd_hip/hip/hcc_detail/device_functions.h | 1078 ++ .../hip/hcc_detail/device_library_decls.h | 118 + .../amd_hip/hip/hcc_detail/driver_types.h | 314 + .../hip/hcc_detail/functional_grid_launch.hpp | 158 + .../amd_hip/hip/hcc_detail/grid_launch.h | 69 + .../amd_hip/hip/hcc_detail/grid_launch.hpp | 50 + .../hip/hcc_detail/grid_launch_GGL.hpp | 30 + src/utils/amd_hip/hip/hcc_detail/helpers.hpp | 110 + src/utils/amd_hip/hip/hcc_detail/hip_atomic.h | 269 + .../amd_hip/hip/hcc_detail/hip_complex.h | 356 + src/utils/amd_hip/hip/hcc_detail/hip_db.h | 21 + src/utils/amd_hip/hip/hcc_detail/hip_fp16.h | 1645 +++ .../amd_hip/hip/hcc_detail/hip_fp16_gcc.h | 257 + .../hip/hcc_detail/hip_fp16_math_fwd.h | 82 + src/utils/amd_hip/hip/hcc_detail/hip_ldg.h | 103 + src/utils/amd_hip/hip/hcc_detail/hip_memory.h | 114 + .../amd_hip/hip/hcc_detail/hip_prof_api.h | 204 + .../amd_hip/hip/hcc_detail/hip_prof_str.h | 2512 ++++ .../amd_hip/hip/hcc_detail/hip_runtime.h | 481 + .../amd_hip/hip/hcc_detail/hip_runtime_api.h | 2860 ++++ .../hip/hcc_detail/hip_surface_types.h | 54 + .../hip/hcc_detail/hip_texture_types.h | 77 + .../amd_hip/hip/hcc_detail/hip_vector_types.h | 880 ++ .../amd_hip/hip/hcc_detail/host_defines.h | 95 + .../amd_hip/hip/hcc_detail/llvm_intrinsics.h | 70 + .../hcc_detail/macro_based_grid_launch.hpp | 798 ++ .../amd_hip/hip/hcc_detail/math_functions.h | 1501 +++ src/utils/amd_hip/hip/hcc_detail/math_fwd.h | 706 + .../amd_hip/hip/hcc_detail/program_state.hpp | 108 + .../hip/hcc_detail/surface_functions.h | 59 + .../hip/hcc_detail/texture_functions.h | 11102 ++++++++++++++++ .../amd_hip/hip/hcc_detail/texture_types.h | 107 + src/utils/amd_hip/hip/hip_common.h | 79 + src/utils/amd_hip/hip/hip_complex.h | 36 + src/utils/amd_hip/hip/hip_fp16.h | 36 + src/utils/amd_hip/hip/hip_hcc.h | 105 + src/utils/amd_hip/hip/hip_profile.h | 42 + src/utils/amd_hip/hip/hip_runtime.h | 67 + src/utils/amd_hip/hip/hip_runtime_api.h | 342 + src/utils/amd_hip/hip/hip_texture_types.h | 36 + src/utils/amd_hip/hip/hip_vector_types.h | 41 + src/utils/amd_hip/hip/math_functions.h | 40 + .../hip/nvcc_detail/channel_descriptor.h | 28 + .../amd_hip/hip/nvcc_detail/hip_complex.h | 119 + .../amd_hip/hip/nvcc_detail/hip_runtime.h | 126 + .../amd_hip/hip/nvcc_detail/hip_runtime_api.h | 1286 ++ .../hip/nvcc_detail/hip_texture_types.h | 6 + src/utils/amd_hip/hip/texture_types.h | 36 + 56 files changed, 29441 insertions(+) create mode 100644 src/utils/amd_hip/hip/channel_descriptor.h create mode 100644 src/utils/amd_hip/hip/device_functions.h create mode 100644 src/utils/amd_hip/hip/driver_types.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/channel_descriptor.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/code_object_bundle.hpp create mode 100644 src/utils/amd_hip/hip/hcc_detail/concepts.hpp create mode 100644 src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/cuda/math_functions.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/device_functions.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/device_library_decls.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/driver_types.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/functional_grid_launch.hpp create mode 100644 src/utils/amd_hip/hip/hcc_detail/grid_launch.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/grid_launch.hpp create mode 100644 src/utils/amd_hip/hip/hcc_detail/grid_launch_GGL.hpp create mode 100644 src/utils/amd_hip/hip/hcc_detail/helpers.hpp create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_atomic.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_complex.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_db.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_fp16.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_fp16_gcc.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_fp16_math_fwd.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_ldg.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_memory.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_prof_api.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_prof_str.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_runtime.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_runtime_api.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_surface_types.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_texture_types.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_vector_types.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/host_defines.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/llvm_intrinsics.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/macro_based_grid_launch.hpp create mode 100644 src/utils/amd_hip/hip/hcc_detail/math_functions.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/math_fwd.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/program_state.hpp create mode 100644 src/utils/amd_hip/hip/hcc_detail/surface_functions.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/texture_functions.h create mode 100644 src/utils/amd_hip/hip/hcc_detail/texture_types.h create mode 100644 src/utils/amd_hip/hip/hip_common.h create mode 100644 src/utils/amd_hip/hip/hip_complex.h create mode 100644 src/utils/amd_hip/hip/hip_fp16.h create mode 100644 src/utils/amd_hip/hip/hip_hcc.h create mode 100644 src/utils/amd_hip/hip/hip_profile.h create mode 100644 src/utils/amd_hip/hip/hip_runtime.h create mode 100644 src/utils/amd_hip/hip/hip_runtime_api.h create mode 100644 src/utils/amd_hip/hip/hip_texture_types.h create mode 100644 src/utils/amd_hip/hip/hip_vector_types.h create mode 100644 src/utils/amd_hip/hip/math_functions.h create mode 100644 src/utils/amd_hip/hip/nvcc_detail/channel_descriptor.h create mode 100644 src/utils/amd_hip/hip/nvcc_detail/hip_complex.h create mode 100644 src/utils/amd_hip/hip/nvcc_detail/hip_runtime.h create mode 100644 src/utils/amd_hip/hip/nvcc_detail/hip_runtime_api.h create mode 100644 src/utils/amd_hip/hip/nvcc_detail/hip_texture_types.h create mode 100644 src/utils/amd_hip/hip/texture_types.h diff --git a/src/utils/amd_hip/hip/channel_descriptor.h b/src/utils/amd_hip/hip/channel_descriptor.h new file mode 100644 index 000000000..842701bad --- /dev/null +++ b/src/utils/amd_hip/hip/channel_descriptor.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H + +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: + + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + +#endif diff --git a/src/utils/amd_hip/hip/device_functions.h b/src/utils/amd_hip/hip/device_functions.h new file mode 100644 index 000000000..f6059f202 --- /dev/null +++ b/src/utils/amd_hip/hip/device_functions.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H + +#include + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + +#endif diff --git a/src/utils/amd_hip/hip/driver_types.h b/src/utils/amd_hip/hip/driver_types.h new file mode 100644 index 000000000..d428ec7f2 --- /dev/null +++ b/src/utils/amd_hip/hip/driver_types.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H +#define HIP_INCLUDE_HIP_DRIVER_TYPES_H + +#include + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include "driver_types.h" +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/channel_descriptor.h b/src/utils/amd_hip/hip/hcc_detail/channel_descriptor.h new file mode 100644 index 000000000..de290fafc --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/channel_descriptor.h @@ -0,0 +1,346 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H + +#include +#include + +#ifdef __cplusplus + +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f); + +static inline hipChannelFormatDesc hipCreateChannelDescHalf() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +static inline hipChannelFormatDesc hipCreateChannelDescHalf1() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +static inline hipChannelFormatDesc hipCreateChannelDescHalf2() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +template +static inline hipChannelFormatDesc hipCreateChannelDesc() { + return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ // vector3 is the same as vector4 +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat); +} + +#ifndef __GNUC__ +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindFloat); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +#else + +struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, + enum hipChannelFormatKind f); + +#endif + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/code_object_bundle.hpp b/src/utils/amd_hip/hip/hcc_detail/code_object_bundle.hpp new file mode 100644 index 000000000..7b97503c1 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/code_object_bundle.hpp @@ -0,0 +1,139 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace hip_impl { +hsa_isa_t triple_to_hsa_isa(const std::string& triple); + +struct Bundled_code { + union Header { + struct { + std::uint64_t offset; + std::uint64_t bundle_sz; + std::uint64_t triple_sz; + }; + char cbuf[sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)]; + } header; + std::string triple; + std::vector blob; +}; + +class Bundled_code_header { + // DATA - STATICS + static constexpr const char magic_string_[] = "__CLANG_OFFLOAD_BUNDLE__"; + static constexpr auto magic_string_sz_ = sizeof(magic_string_) - 1; + + // DATA + union Header_ { + struct { + char bundler_magic_string_[magic_string_sz_]; + std::uint64_t bundle_cnt_; + }; + char cbuf_[sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)]; + } header_; + std::vector bundles_; + + // FRIENDS - MANIPULATORS + template + friend inline bool read(RandomAccessIterator f, RandomAccessIterator l, + Bundled_code_header& x) { + if (f == l) return false; + + std::copy_n(f, sizeof(x.header_.cbuf_), x.header_.cbuf_); + + if (valid(x)) { + x.bundles_.resize(x.header_.bundle_cnt_); + + auto it = f + sizeof(x.header_.cbuf_); + for (auto&& y : x.bundles_) { + std::copy_n(it, sizeof(y.header.cbuf), y.header.cbuf); + it += sizeof(y.header.cbuf); + + y.triple.assign(it, it + y.header.triple_sz); + + std::copy_n(f + y.header.offset, y.header.bundle_sz, std::back_inserter(y.blob)); + + it += y.header.triple_sz; + + x.bundled_code_size = std::max(x.bundled_code_size, + y.header.offset + y.header.bundle_sz); + } + + return true; + } + + return false; + } + friend inline bool read(const std::vector& blob, Bundled_code_header& x) { + return read(blob.cbegin(), blob.cend(), x); + } + friend inline bool read(std::istream& is, Bundled_code_header& x) { + return read( + std::vector{std::istreambuf_iterator{is}, std::istreambuf_iterator{}}, + x); + } + + // FRIENDS - ACCESSORS + friend inline bool valid(const Bundled_code_header& x) { + return std::equal(magic_string_, magic_string_ + magic_string_sz_, + x.header_.bundler_magic_string_); + } + friend inline const std::vector& bundles(const Bundled_code_header& x) { + return x.bundles_; + } + + public: + // CREATORS + Bundled_code_header() = default; + template + Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l); + explicit Bundled_code_header(const std::vector& blob); + explicit Bundled_code_header(const void* maybe_blob); + Bundled_code_header(const Bundled_code_header&) = default; + Bundled_code_header(Bundled_code_header&&) = default; + ~Bundled_code_header() = default; + + // MANIPULATORS + Bundled_code_header& operator=(const Bundled_code_header&) = default; + Bundled_code_header& operator=(Bundled_code_header&&) = default; + + size_t bundled_code_size = 0; +}; + +// CREATORS +template +Bundled_code_header::Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l) + : Bundled_code_header{} { + read(f, l, *this); +} +} // Namespace hip_impl. diff --git a/src/utils/amd_hip/hip/hcc_detail/concepts.hpp b/src/utils/amd_hip/hip/hcc_detail/concepts.hpp new file mode 100644 index 000000000..373cefb29 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/concepts.hpp @@ -0,0 +1,30 @@ +/* +Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +namespace hip_impl // Documentation only. +{ +#define requires(...) + +#define FunctionalProcedure typename +} // namespace hip_impl diff --git a/src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h b/src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h @@ -0,0 +1 @@ + diff --git a/src/utils/amd_hip/hip/hcc_detail/cuda/math_functions.h b/src/utils/amd_hip/hip/hcc_detail/cuda/math_functions.h new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/cuda/math_functions.h @@ -0,0 +1 @@ + diff --git a/src/utils/amd_hip/hip/hcc_detail/device_functions.h b/src/utils/amd_hip/hip/hcc_detail/device_functions.h new file mode 100644 index 000000000..602c6be87 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/device_functions.h @@ -0,0 +1,1078 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H + +#include "host_defines.h" +#include "math_fwd.h" + +#include +#include + + +#include +#include +#include +/* +Integer Intrinsics +*/ + +// integer intrinsic function __poc __clz __ffs __brev +__device__ static inline unsigned int __popc(unsigned int input) { + return __builtin_popcount(input); +} +__device__ static inline unsigned int __popcll(unsigned long long int input) { + return __builtin_popcountl(input); +} + +__device__ static inline int __clz(int input) { + return __ockl_clz_u32((uint)input); +} + +__device__ static inline int __clzll(long long int input) { + return __ockl_clz_u64((ulong)input); +} + +__device__ static inline unsigned int __ffs(unsigned int input) { + return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; +} + +__device__ static inline unsigned int __ffsll(unsigned long long int input) { + return ( input == 0 ? -1 : __builtin_ctzl(input) ) + 1; +} + +__device__ static inline unsigned int __ffs(int input) { + return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; +} + +__device__ static inline unsigned int __ffsll(long long int input) { + return ( input == 0 ? -1 : __builtin_ctzl(input) ) + 1; +} + +__device__ static inline unsigned int __brev(unsigned int input) { + return __llvm_bitrev_b32(input); +} + +__device__ static inline unsigned long long int __brevll(unsigned long long int input) { + return __llvm_bitrev_b64(input); +} + +__device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) { + return input == 0 ? -1 : __builtin_ctzl(input); +} + +__device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, unsigned int src2) { + uint32_t offset = src1 & 31; + uint32_t width = src2 & 31; + return width == 0 ? 0 : (src0 << (32 - offset - width)) >> (32 - width); +} + +__device__ static inline uint64_t __bitextract_u64(uint64_t src0, unsigned int src1, unsigned int src2) { + uint64_t offset = src1 & 63; + uint64_t width = src2 & 63; + return width == 0 ? 0 : (src0 << (64 - offset - width)) >> (64 - width); +} + +__device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) { + uint32_t offset = src2 & 31; + uint32_t width = src3 & 31; + uint32_t mask = (1 << width) - 1; + return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); +} + +__device__ static inline uint64_t __bitinsert_u64(uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) { + uint64_t offset = src2 & 63; + uint64_t width = src3 & 63; + uint64_t mask = (1 << width) - 1; + return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); +} + +__device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s); +__device__ static unsigned int __hadd(int x, int y); +__device__ static int __mul24(int x, int y); +__device__ static long long int __mul64hi(long long int x, long long int y); +__device__ static int __mulhi(int x, int y); +__device__ static int __rhadd(int x, int y); +__device__ static unsigned int __sad(int x, int y, int z); +__device__ static unsigned int __uhadd(unsigned int x, unsigned int y); +__device__ static int __umul24(unsigned int x, unsigned int y); +__device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); +__device__ static unsigned int __umulhi(unsigned int x, unsigned int y); +__device__ static unsigned int __urhadd(unsigned int x, unsigned int y); +__device__ static unsigned int __usad(unsigned int x, unsigned int y, unsigned int z); + +struct ucharHolder { + union { + unsigned char c[4]; + unsigned int ui; + }; +} __attribute__((aligned(4))); + +struct uchar2Holder { + union { + unsigned int ui[2]; + unsigned char c[8]; + }; +} __attribute__((aligned(8))); + +__device__ +static inline unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) { + struct uchar2Holder cHoldVal; + struct ucharHolder cHoldKey; + struct ucharHolder cHoldOut; + cHoldKey.ui = s; + cHoldVal.ui[0] = x; + cHoldVal.ui[1] = y; + cHoldOut.c[0] = cHoldVal.c[cHoldKey.c[0]]; + cHoldOut.c[1] = cHoldVal.c[cHoldKey.c[1]]; + cHoldOut.c[2] = cHoldVal.c[cHoldKey.c[2]]; + cHoldOut.c[3] = cHoldVal.c[cHoldKey.c[3]]; + return cHoldOut.ui; +} + +__device__ static inline unsigned int __hadd(int x, int y) { + int z = x + y; + int sign = z & 0x8000000; + int value = z & 0x7FFFFFFF; + return ((value) >> 1 || sign); +} + +__device__ static inline int __mul24(int x, int y) { + return __ockl_mul24_i32(x, y); +} + +__device__ static inline long long __mul64hi(long long int x, long long int y) { + ulong x0 = (ulong)x & 0xffffffffUL; + long x1 = x >> 32; + ulong y0 = (ulong)y & 0xffffffffUL; + long y1 = y >> 32; + ulong z0 = x0*y0; + long t = x1*y0 + (z0 >> 32); + long z1 = t & 0xffffffffL; + long z2 = t >> 32; + z1 = x0*y1 + z1; + return x1*y1 + z2 + (z1 >> 32); +} + +__device__ static inline int __mulhi(int x, int y) { + return __ockl_mul_hi_i32(x, y); +} + +__device__ static inline int __rhadd(int x, int y) { + int z = x + y + 1; + int sign = z & 0x8000000; + int value = z & 0x7FFFFFFF; + return ((value) >> 1 || sign); +} +__device__ static inline unsigned int __sad(int x, int y, int z) { + return x > y ? x - y + z : y - x + z; +} +__device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { + return (x + y) >> 1; +} +__device__ static inline int __umul24(unsigned int x, unsigned int y) { + return __ockl_mul24_u32(x, y); +} + +__device__ +static inline unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) { + ulong x0 = x & 0xffffffffUL; + ulong x1 = x >> 32; + ulong y0 = y & 0xffffffffUL; + ulong y1 = y >> 32; + ulong z0 = x0*y0; + ulong t = x1*y0 + (z0 >> 32); + ulong z1 = t & 0xffffffffUL; + ulong z2 = t >> 32; + z1 = x0*y1 + z1; + return x1*y1 + z2 + (z1 >> 32); +} + +__device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) { + return __ockl_mul_hi_u32(x, y); +} +__device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) { + return (x + y + 1) >> 1; +} +__device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) { + return __ockl_sad_u32(x, y, z); +} + +__device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } + +/* +HIP specific device functions +*/ + +// utility union type +union __u { + int i; + unsigned int u; + float f; +}; + +__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { + __u tmp; tmp.u = src; + tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_bpermutef(int index, float src) { + __u tmp; tmp.f = src; + tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); + return tmp.f; +} + +__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) { + __u tmp; tmp.u = src; + tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_permutef(int index, float src) { + __u tmp; tmp.u = src; + tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); + return tmp.u; +} + +__device__ static inline unsigned __hip_ds_swizzle(unsigned int src, int pattern) { + __u tmp; tmp.u = src; + tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.u; +} +__device__ static inline float __hip_ds_swizzlef(float src, int pattern) { + __u tmp; tmp.f = src; + tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.f; +} + +__device__ static inline int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, + int bank_mask, bool bound_ctrl) { + return __llvm_amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); +} + +static constexpr int warpSize = 64; + + __device__ +inline +int __shfl(int var, int src_lane, int width = warpSize) { + int self = __lane_id(); + int index = src_lane + (self & ~(width-1)); + return __llvm_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl(unsigned int var, int src_lane, int width = warpSize) { + __u tmp; tmp.u = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.u; +} +__device__ +inline +float __shfl(float var, int src_lane, int width = warpSize) { + __u tmp; tmp.f = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.f; +} +__device__ +inline +double __shfl(double var, int src_lane, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + + __device__ +inline +int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) { + int self = __lane_id(); + int index = self - lane_delta; + index = (index < (self & ~(width-1)))?self:index; + return __llvm_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl_up(unsigned int var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.u = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.u; +} +__device__ +inline +float __shfl_up(float var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.f = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.f; +} +__device__ +inline +double __shfl_up(double var, unsigned int lane_delta, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ +inline +int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) { + int self = __lane_id(); + int index = self + lane_delta; + index = (int)((self&(width-1))+lane_delta) >= width?self:index; + return __llvm_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl_down(unsigned int var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.u = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.u; +} +__device__ +inline +float __shfl_down(float var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.f = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.f; +} +__device__ +inline +double __shfl_down(double var, unsigned int lane_delta, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ +inline +int __shfl_xor(int var, int lane_mask, int width = warpSize) { + int self = __lane_id(); + int index = self^lane_mask; + index = index >= ((self+width)&~(width-1))?self:index; + return __llvm_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl_xor(unsigned int var, int lane_mask, int width = warpSize) { + __u tmp; tmp.u = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.u; +} +__device__ +inline +float __shfl_xor(float var, int lane_mask, int width = warpSize) { + __u tmp; tmp.f = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.f; +} +__device__ +inline +double __shfl_xor(double var, int lane_mask, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +#define MASK1 0x00ff00ff +#define MASK2 0xff00ff00 + +__device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 + one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 + one2) & MASK2); + return out; +} + +__device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 - one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 - one2) & MASK2); + return out; +} + +__device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 * one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 * one2) & MASK2); + return out; +} + +/* + * Rounding modes are not yet supported in HIP + * TODO: Conversion functions are not correct, need to fix when BE is ready +*/ + +__device__ static inline float __double2float_rd(double x) { return (double)x; } +__device__ static inline float __double2float_rn(double x) { return (double)x; } +__device__ static inline float __double2float_ru(double x) { return (double)x; } +__device__ static inline float __double2float_rz(double x) { return (double)x; } + +__device__ static inline int __double2hiint(double x) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &x, sizeof(tmp)); + + return tmp[1]; +} +__device__ static inline int __double2loint(double x) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &x, sizeof(tmp)); + + return tmp[0]; +} + +__device__ static inline int __double2int_rd(double x) { return (int)x; } +__device__ static inline int __double2int_rn(double x) { return (int)x; } +__device__ static inline int __double2int_ru(double x) { return (int)x; } +__device__ static inline int __double2int_rz(double x) { return (int)x; } + +__device__ static inline long long int __double2ll_rd(double x) { return (long long int)x; } +__device__ static inline long long int __double2ll_rn(double x) { return (long long int)x; } +__device__ static inline long long int __double2ll_ru(double x) { return (long long int)x; } +__device__ static inline long long int __double2ll_rz(double x) { return (long long int)x; } + +__device__ static inline unsigned int __double2uint_rd(double x) { return (unsigned int)x; } +__device__ static inline unsigned int __double2uint_rn(double x) { return (unsigned int)x; } +__device__ static inline unsigned int __double2uint_ru(double x) { return (unsigned int)x; } +__device__ static inline unsigned int __double2uint_rz(double x) { return (unsigned int)x; } + +__device__ static inline unsigned long long int __double2ull_rd(double x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __double2ull_rn(double x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __double2ull_ru(double x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __double2ull_rz(double x) { + return (unsigned long long int)x; +} + +__device__ static inline long long int __double_as_longlong(double x) { + static_assert(sizeof(long long) == sizeof(double), ""); + + long long tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +/* +__device__ unsigned short __float2half_rn(float x); +__device__ float __half2float(unsigned short); + +The above device function are not a valid . +Use +__device__ __half __float2half_rn(float x); +__device__ float __half2float(__half); +from hip_fp16.h + +CUDA implements half as unsigned short whereas, HIP doesn't. + +*/ + +__device__ static inline int __float2int_rd(float x) { return (int)__ocml_floor_f32(x); } +__device__ static inline int __float2int_rn(float x) { return (int)__ocml_rint_f32(x); } +__device__ static inline int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); } +__device__ static inline int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); } + +__device__ static inline long long int __float2ll_rd(float x) { return (long long int)x; } +__device__ static inline long long int __float2ll_rn(float x) { return (long long int)x; } +__device__ static inline long long int __float2ll_ru(float x) { return (long long int)x; } +__device__ static inline long long int __float2ll_rz(float x) { return (long long int)x; } + +__device__ static inline unsigned int __float2uint_rd(float x) { return (unsigned int)x; } +__device__ static inline unsigned int __float2uint_rn(float x) { return (unsigned int)x; } +__device__ static inline unsigned int __float2uint_ru(float x) { return (unsigned int)x; } +__device__ static inline unsigned int __float2uint_rz(float x) { return (unsigned int)x; } + +__device__ static inline unsigned long long int __float2ull_rd(float x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __float2ull_rn(float x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __float2ull_ru(float x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __float2ull_rz(float x) { + return (unsigned long long int)x; +} + +__device__ static inline int __float_as_int(float x) { + static_assert(sizeof(int) == sizeof(float), ""); + + int tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline unsigned int __float_as_uint(float x) { + static_assert(sizeof(unsigned int) == sizeof(float), ""); + + unsigned int tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __hiloint2double(int hi, int lo) { + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + uint64_t tmp0 = (static_cast(hi) << 32ull) | static_cast(lo); + double tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + + return tmp1; +} + +__device__ static inline double __int2double_rn(int x) { return (double)x; } + +__device__ static inline float __int2float_rd(int x) { return (float)x; } +__device__ static inline float __int2float_rn(int x) { return (float)x; } +__device__ static inline float __int2float_ru(int x) { return (float)x; } +__device__ static inline float __int2float_rz(int x) { return (float)x; } + +__device__ static inline float __int_as_float(int x) { + static_assert(sizeof(float) == sizeof(int), ""); + + float tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __ll2double_rd(long long int x) { return (double)x; } +__device__ static inline double __ll2double_rn(long long int x) { return (double)x; } +__device__ static inline double __ll2double_ru(long long int x) { return (double)x; } +__device__ static inline double __ll2double_rz(long long int x) { return (double)x; } + +__device__ static inline float __ll2float_rd(long long int x) { return (float)x; } +__device__ static inline float __ll2float_rn(long long int x) { return (float)x; } +__device__ static inline float __ll2float_ru(long long int x) { return (float)x; } +__device__ static inline float __ll2float_rz(long long int x) { return (float)x; } + +__device__ static inline double __longlong_as_double(long long int x) { + static_assert(sizeof(double) == sizeof(long long), ""); + + double tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __uint2double_rn(int x) { return (double)x; } + +__device__ static inline float __uint2float_rd(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_rn(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_ru(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_rz(unsigned int x) { return (float)x; } + +__device__ static inline float __uint_as_float(unsigned int x) { + static_assert(sizeof(float) == sizeof(unsigned int), ""); + + float tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __ull2double_rd(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_rn(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_ru(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_rz(unsigned long long int x) { return (double)x; } + +__device__ static inline float __ull2float_rd(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_rn(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_ru(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_rz(unsigned long long int x) { return (float)x; } + +#if defined(__HCC__) +#define __HCC_OR_HIP_CLANG__ 1 +#elif defined(__clang__) && defined(__HIP__) +#define __HCC_OR_HIP_CLANG__ 1 +#else +#define __HCC_OR_HIP_CLANG__ 0 +#endif + +#ifdef __HCC_OR_HIP_CLANG__ + +// Clock functions +__device__ long long int __clock64(); +__device__ long long int __clock(); +__device__ long long int clock64(); +__device__ long long int clock(); +// hip.amdgcn.bc - named sync +__device__ void __named_sync(int a, int b); + +#ifdef __HIP_DEVICE_COMPILE__ + +// Clock functions +#if __HCC__ +extern "C" uint64_t __clock_u64() __HC__; +#endif + +__device__ +inline __attribute((always_inline)) +long long int __clock64() { +// ToDo: Unify HCC and HIP implementation. +#if __HCC__ + return (long long int) __clock_u64(); +#else + return (long long int) __builtin_amdgcn_s_memrealtime(); +#endif +} + +__device__ +inline __attribute((always_inline)) +long long int __clock() { return __clock64(); } + +__device__ +inline __attribute__((always_inline)) +long long int clock64() { return __clock64(); } + +__device__ +inline __attribute__((always_inline)) +long long int clock() { return __clock(); } + +// hip.amdgcn.bc - named sync +__device__ +inline +void __named_sync(int a, int b) { __builtin_amdgcn_s_barrier(); } + +#endif // __HIP_DEVICE_COMPILE__ + +// warp vote function __all __any __ballot +__device__ +inline +int __all(int predicate) { + return __ockl_wfall_i32(predicate); +} + +__device__ +inline +int __any(int predicate) { + return __ockl_wfany_i32(predicate); +} + +// XXX from llvm/include/llvm/IR/InstrTypes.h +#define ICMP_NE 33 + +__device__ +inline +unsigned long long int __ballot(int predicate) { + return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE); +} + +__device__ +inline +unsigned long long int __ballot64(int predicate) { + return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE); +} + +// hip.amdgcn.bc - lanemask +__device__ +inline +int64_t __lanemask_gt() +{ + int32_t activelane = __ockl_activelane_u32(); + int64_t ballot = __ballot64(1); + if (activelane != 63) { + int64_t tmp = (~0UL) << (activelane + 1); + return tmp & ballot; + } + return 0; +} + +__device__ +inline +int64_t __lanemask_lt() +{ + int32_t activelane = __ockl_activelane_u32(); + int64_t ballot = __ballot64(1); + if (activelane == 0) + return 0; + return ballot; +} + +__device__ inline void* __local_to_generic(void* p) { return p; } + +#ifdef __HIP_DEVICE_COMPILE__ +__device__ +inline +void* __get_dynamicgroupbaseptr() +{ + // Get group segment base pointer. + return (char*)__local_to_generic((void*)__to_local(__llvm_amdgcn_groupstaticsize())); +} +#else +__device__ +void* __get_dynamicgroupbaseptr(); +#endif // __HIP_DEVICE_COMPILE__ + +__device__ +inline +void *__amdgcn_get_dynamicgroupbaseptr() { + return __get_dynamicgroupbaseptr(); +} + +#if defined(__HCC__) && (__hcc_minor__ < 3) +// hip.amdgcn.bc - sync threads +#define __CLK_LOCAL_MEM_FENCE 0x01 +typedef unsigned __cl_mem_fence_flags; + +typedef enum __memory_scope { + __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, + __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +} __memory_scope; + +// enum values aligned with what clang uses in EmitAtomicExpr() +typedef enum __memory_order +{ + __memory_order_relaxed = __ATOMIC_RELAXED, + __memory_order_acquire = __ATOMIC_ACQUIRE, + __memory_order_release = __ATOMIC_RELEASE, + __memory_order_acq_rel = __ATOMIC_ACQ_REL, + __memory_order_seq_cst = __ATOMIC_SEQ_CST +} __memory_order; + +__device__ +inline +static void +__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope) +{ + // We're tying global-happens-before and local-happens-before together as does HSA + if (order != __memory_order_relaxed) { + switch (scope) { + case __memory_scope_work_item: + break; + case __memory_scope_sub_group: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_sg(); break; + case __memory_order_release: __llvm_fence_rel_sg(); break; + case __memory_order_acq_rel: __llvm_fence_ar_sg(); break; + case __memory_order_seq_cst: __llvm_fence_sc_sg(); break; + } + break; + case __memory_scope_work_group: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_wg(); break; + case __memory_order_release: __llvm_fence_rel_wg(); break; + case __memory_order_acq_rel: __llvm_fence_ar_wg(); break; + case __memory_order_seq_cst: __llvm_fence_sc_wg(); break; + } + break; + case __memory_scope_device: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_dev(); break; + case __memory_order_release: __llvm_fence_rel_dev(); break; + case __memory_order_acq_rel: __llvm_fence_ar_dev(); break; + case __memory_order_seq_cst: __llvm_fence_sc_dev(); break; + } + break; + case __memory_scope_all_svm_devices: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_sys(); break; + case __memory_order_release: __llvm_fence_rel_sys(); break; + case __memory_order_acq_rel: __llvm_fence_ar_sys(); break; + case __memory_order_seq_cst: __llvm_fence_sc_sys(); break; + } + break; + } + } +} +#endif + +// Memory Fence Functions +__device__ +inline +static void __threadfence() +{ + __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_device); +} + +__device__ +inline +static void __threadfence_block() +{ + __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_work_group); +} + +__device__ +inline +static void __threadfence_system() +{ + __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_all_svm_devices); +} + +// abort +__device__ +inline +__attribute__((weak)) +void abort() { + return __builtin_trap(); +} + + +#endif // __HCC_OR_HIP_CLANG__ + +#ifdef __HCC__ + +/** + * extern __shared__ + */ + +// Macro to replace extern __shared__ declarations +// to local variable definitions +#define HIP_DYNAMIC_SHARED(type, var) type* var = (type*)__get_dynamicgroupbaseptr(); + +#define HIP_DYNAMIC_SHARED_ATTRIBUTE + + +#elif defined(__clang__) && defined(__HIP__) + +#pragma push_macro("__DEVICE__") +#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \ + __attribute__((weak)) + +__DEVICE__ +inline +void __assert_fail(const char * __assertion, + const char *__file, + unsigned int __line, + const char *__function) +{ + // Ignore all the args for now. + __builtin_trap(); +} + +__DEVICE__ +inline +void __assertfail(const char * __assertion, + const char *__file, + unsigned int __line, + const char *__function, + size_t charsize) +{ + // ignore all the args for now. + __builtin_trap(); +} + +__device__ +inline +static void __work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) +{ + if (flags) { + __atomic_work_item_fence(flags, __memory_order_release, scope); + __builtin_amdgcn_s_barrier(); + __atomic_work_item_fence(flags, __memory_order_acquire, scope); + } else { + __builtin_amdgcn_s_barrier(); + } +} + +__device__ +inline +static void __barrier(int n) +{ + __work_group_barrier((__cl_mem_fence_flags)n, __memory_scope_work_group); +} + +__device__ +inline +__attribute__((noduplicate)) +void __syncthreads() +{ + __barrier(__CLK_LOCAL_MEM_FENCE); +} + +// hip.amdgcn.bc - device routine +/* + HW_ID Register bit structure + WAVE_ID 3:0 Wave buffer slot number. 0-9. + SIMD_ID 5:4 SIMD which the wave is assigned to within the CU. + PIPE_ID 7:6 Pipeline from which the wave was dispatched. + CU_ID 11:8 Compute Unit the wave is assigned to. + SH_ID 12 Shader Array (within an SE) the wave is assigned to. + SE_ID 14:13 Shader Engine the wave is assigned to. + TG_ID 19:16 Thread-group ID + VM_ID 23:20 Virtual Memory ID + QUEUE_ID 26:24 Queue from which this wave was dispatched. + STATE_ID 29:27 State ID (graphics only, not compute). + ME_ID 31:30 Micro-engine ID. + */ + +#define HW_ID 4 + +#define HW_ID_CU_ID_SIZE 4 +#define HW_ID_CU_ID_OFFSET 8 + +#define HW_ID_SE_ID_SIZE 2 +#define HW_ID_SE_ID_OFFSET 13 + +/* + Encoding of parameter bitmask + HW_ID 5:0 HW_ID + OFFSET 10:6 Range: 0..31 + SIZE 15:11 Range: 1..32 + */ + +#define GETREG_IMMED(SZ,OFF,REG) (SZ << 11) | (OFF << 6) | REG + +__device__ +inline +unsigned __smid(void) +{ + unsigned cu_id = __builtin_amdgcn_s_getreg( + GETREG_IMMED(HW_ID_CU_ID_SIZE, HW_ID_CU_ID_OFFSET, HW_ID)); + unsigned se_id = __builtin_amdgcn_s_getreg( + GETREG_IMMED(HW_ID_SE_ID_SIZE, HW_ID_SE_ID_OFFSET, HW_ID)); + + /* Each shader engine has 16 CU */ + return (se_id << HW_ID_CU_ID_SIZE) + cu_id; +} + +#pragma push_macro("__DEVICE__") + +// Macro to replace extern __shared__ declarations +// to local variable definitions +#define HIP_DYNAMIC_SHARED(type, var) \ + type* var = (type*)__amdgcn_get_dynamicgroupbaseptr(); + +#define HIP_DYNAMIC_SHARED_ATTRIBUTE + + +#endif //defined(__clang__) && defined(__HIP__) + + +// loop unrolling +static inline __device__ void* __hip_hc_memcpy(void* dst, const void* src, size_t size) { + auto dstPtr = static_cast(dst); + auto srcPtr = static_cast(src); + + while (size >= 4u) { + dstPtr[0] = srcPtr[0]; + dstPtr[1] = srcPtr[1]; + dstPtr[2] = srcPtr[2]; + dstPtr[3] = srcPtr[3]; + + size -= 4u; + srcPtr += 4u; + dstPtr += 4u; + } + switch (size) { + case 3: + dstPtr[2] = srcPtr[2]; + case 2: + dstPtr[1] = srcPtr[1]; + case 1: + dstPtr[0] = srcPtr[0]; + } + + return dst; +} + +static inline __device__ void* __hip_hc_memset(void* dst, unsigned char val, size_t size) { + auto dstPtr = static_cast(dst); + + while (size >= 4u) { + dstPtr[0] = val; + dstPtr[1] = val; + dstPtr[2] = val; + dstPtr[3] = val; + + size -= 4u; + dstPtr += 4u; + } + switch (size) { + case 3: + dstPtr[2] = val; + case 2: + dstPtr[1] = val; + case 1: + dstPtr[0] = val; + } + + return dst; +} +static inline __device__ void* memcpy(void* dst, const void* src, size_t size) { + return __hip_hc_memcpy(dst, src, size); +} + +static inline __device__ void* memset(void* ptr, int val, size_t size) { + unsigned char val8 = static_cast(val); + return __hip_hc_memset(ptr, val8, size); +} + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/device_library_decls.h b/src/utils/amd_hip/hip/hcc_detail/device_library_decls.h new file mode 100644 index 000000000..2bf3c8cc5 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/device_library_decls.h @@ -0,0 +1,118 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/device_library_decls.h + * @brief Contains declarations for types and functions in device library. + */ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H + +#include "hip/hcc_detail/host_defines.h" + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int); +extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int); +extern "C" __device__ uint __ockl_activelane_u32(void); + +extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint); +extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int); +extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint); +extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int); +extern "C" __device__ __attribute__((const)) uint __ockl_sad_u32(uint, uint, uint); + +extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar); +extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort); +extern "C" __device__ __attribute__((const)) uint __ockl_clz_u32(uint); +extern "C" __device__ __attribute__((const)) ulong __ockl_clz_u64(ulong); + +extern "C" __device__ __attribute__((const)) float __ocml_floor_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_rint_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_ceil_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_trunc_f32(float); + +extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); +extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); + +// Introduce local address space +#define __local __attribute__((address_space(3))) + +#ifdef __HIP_DEVICE_COMPILE__ +__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; } +#endif //__HIP_DEVICE_COMPILE__ + +#if defined(__HCC__) && (__hcc_minor__ < 3) +// __llvm_fence* functions from device-libs/irif/src/fence.ll +extern "C" __device__ void __llvm_fence_acq_sg(void); +extern "C" __device__ void __llvm_fence_acq_wg(void); +extern "C" __device__ void __llvm_fence_acq_dev(void); +extern "C" __device__ void __llvm_fence_acq_sys(void); + +extern "C" __device__ void __llvm_fence_rel_sg(void); +extern "C" __device__ void __llvm_fence_rel_wg(void); +extern "C" __device__ void __llvm_fence_rel_dev(void); +extern "C" __device__ void __llvm_fence_rel_sys(void); + +extern "C" __device__ void __llvm_fence_ar_sg(void); +extern "C" __device__ void __llvm_fence_ar_wg(void); +extern "C" __device__ void __llvm_fence_ar_dev(void); +extern "C" __device__ void __llvm_fence_ar_sys(void); + + +extern "C" __device__ void __llvm_fence_sc_sg(void); +extern "C" __device__ void __llvm_fence_sc_wg(void); +extern "C" __device__ void __llvm_fence_sc_dev(void); +extern "C" __device__ void __llvm_fence_sc_sys(void); +#else +// Using hip.amdgcn.bc - sync threads +#define __CLK_LOCAL_MEM_FENCE 0x01 +typedef unsigned __cl_mem_fence_flags; + +typedef enum __memory_scope { + __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, + __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +} __memory_scope; + +// enum values aligned with what clang uses in EmitAtomicExpr() +typedef enum __memory_order +{ + __memory_order_relaxed = __ATOMIC_RELAXED, + __memory_order_acquire = __ATOMIC_ACQUIRE, + __memory_order_release = __ATOMIC_RELEASE, + __memory_order_acq_rel = __ATOMIC_ACQ_REL, + __memory_order_seq_cst = __ATOMIC_SEQ_CST +} __memory_order; + +// Linked from hip.amdgcn.bc +extern "C" __device__ void +__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope); +#endif + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/driver_types.h b/src/utils/amd_hip/hip/hcc_detail/driver_types.h new file mode 100644 index 000000000..8e1fec11f --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/driver_types.h @@ -0,0 +1,314 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H + +#ifndef __cplusplus +#include +#endif + +typedef void* hipDeviceptr_t; +typedef enum hipChannelFormatKind { + hipChannelFormatKindSigned = 0, + hipChannelFormatKindUnsigned = 1, + hipChannelFormatKindFloat = 2, + hipChannelFormatKindNone = 3 +}hipChannelFormatKind; + +typedef struct hipChannelFormatDesc { + int x; + int y; + int z; + int w; + enum hipChannelFormatKind f; +}hipChannelFormatDesc; + +#define HIP_TRSF_NORMALIZED_COORDINATES 0x01 +#define HIP_TRSF_READ_AS_INTEGER 0x00 +#define HIP_TRSA_OVERRIDE_FORMAT 0x01 + +typedef enum hipArray_Format { + HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01, + HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02, + HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03, + HIP_AD_FORMAT_SIGNED_INT8 = 0x08, + HIP_AD_FORMAT_SIGNED_INT16 = 0x09, + HIP_AD_FORMAT_SIGNED_INT32 = 0x0a, + HIP_AD_FORMAT_HALF = 0x10, + HIP_AD_FORMAT_FLOAT = 0x20 +}hipArray_Format; + +typedef struct HIP_ARRAY_DESCRIPTOR { + enum hipArray_Format format; + unsigned int numChannels; + size_t width; + size_t height; + unsigned int flags; + size_t depth; +}HIP_ARRAY_DESCRIPTOR; + +typedef struct hipArray { + void* data; // FIXME: generalize this + struct hipChannelFormatDesc desc; + unsigned int type; + unsigned int width; + unsigned int height; + unsigned int depth; + struct HIP_ARRAY_DESCRIPTOR drvDesc; + bool isDrv; + unsigned int textureType; +}hipArray; + +typedef struct hip_Memcpy2D { + size_t height; + size_t widthInBytes; + hipArray* dstArray; + hipDeviceptr_t dstDevice; + void* dstHost; + hipMemoryType dstMemoryType; + size_t dstPitch; + size_t dstXInBytes; + size_t dstY; + hipArray* srcArray; + hipDeviceptr_t srcDevice; + const void* srcHost; + hipMemoryType srcMemoryType; + size_t srcPitch; + size_t srcXInBytes; + size_t srcY; +} hip_Memcpy2D; + + +typedef struct hipArray* hipArray_t; + +typedef const struct hipArray* hipArray_const_t; + +// TODO: It needs to be modified since it was just copied from hipArray. +struct hipMipmappedArray { + void* data; // FIXME: generalize this + struct hipChannelFormatDesc desc; + unsigned int width; + unsigned int height; + unsigned int depth; +}; + +typedef struct hipMipmappedArray* hipMipmappedArray_t; + +typedef const struct hipMipmappedArray* hipMipmappedArray_const_t; + +/** + * hip resource types + */ +typedef enum hipResourceType { + hipResourceTypeArray = 0x00, + hipResourceTypeMipmappedArray = 0x01, + hipResourceTypeLinear = 0x02, + hipResourceTypePitch2D = 0x03 +}hipResourceType; + +/** + * hip texture resource view formats + */ +typedef enum hipResourceViewFormat { + hipResViewFormatNone = 0x00, + hipResViewFormatUnsignedChar1 = 0x01, + hipResViewFormatUnsignedChar2 = 0x02, + hipResViewFormatUnsignedChar4 = 0x03, + hipResViewFormatSignedChar1 = 0x04, + hipResViewFormatSignedChar2 = 0x05, + hipResViewFormatSignedChar4 = 0x06, + hipResViewFormatUnsignedShort1 = 0x07, + hipResViewFormatUnsignedShort2 = 0x08, + hipResViewFormatUnsignedShort4 = 0x09, + hipResViewFormatSignedShort1 = 0x0a, + hipResViewFormatSignedShort2 = 0x0b, + hipResViewFormatSignedShort4 = 0x0c, + hipResViewFormatUnsignedInt1 = 0x0d, + hipResViewFormatUnsignedInt2 = 0x0e, + hipResViewFormatUnsignedInt4 = 0x0f, + hipResViewFormatSignedInt1 = 0x10, + hipResViewFormatSignedInt2 = 0x11, + hipResViewFormatSignedInt4 = 0x12, + hipResViewFormatHalf1 = 0x13, + hipResViewFormatHalf2 = 0x14, + hipResViewFormatHalf4 = 0x15, + hipResViewFormatFloat1 = 0x16, + hipResViewFormatFloat2 = 0x17, + hipResViewFormatFloat4 = 0x18, + hipResViewFormatUnsignedBlockCompressed1 = 0x19, + hipResViewFormatUnsignedBlockCompressed2 = 0x1a, + hipResViewFormatUnsignedBlockCompressed3 = 0x1b, + hipResViewFormatUnsignedBlockCompressed4 = 0x1c, + hipResViewFormatSignedBlockCompressed4 = 0x1d, + hipResViewFormatUnsignedBlockCompressed5 = 0x1e, + hipResViewFormatSignedBlockCompressed5 = 0x1f, + hipResViewFormatUnsignedBlockCompressed6H = 0x20, + hipResViewFormatSignedBlockCompressed6H = 0x21, + hipResViewFormatUnsignedBlockCompressed7 = 0x22 +}hipResourceViewFormat; + +/** + * HIP resource descriptor + */ +typedef struct hipResourceDesc { + enum hipResourceType resType; + + union { + struct { + hipArray_t array; + } array; + struct { + hipMipmappedArray_t mipmap; + } mipmap; + struct { + void* devPtr; + struct hipChannelFormatDesc desc; + size_t sizeInBytes; + } linear; + struct { + void* devPtr; + struct hipChannelFormatDesc desc; + size_t width; + size_t height; + size_t pitchInBytes; + } pitch2D; + } res; +}hipResourceDesc; + +/** + * hip resource view descriptor + */ +struct hipResourceViewDesc { + enum hipResourceViewFormat format; + size_t width; + size_t height; + size_t depth; + unsigned int firstMipmapLevel; + unsigned int lastMipmapLevel; + unsigned int firstLayer; + unsigned int lastLayer; +}; + +/** + * Memory copy types + * + */ +typedef enum hipMemcpyKind { + hipMemcpyHostToHost = 0, ///< Host-to-Host Copy + hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy + hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy + hipMemcpyDeviceToDevice = 3, ///< Device-to-Device Copy + hipMemcpyDefault = + 4 ///< Runtime will automatically determine copy-kind based on virtual addresses. +} hipMemcpyKind; + +typedef struct hipPitchedPtr { + void* ptr; + size_t pitch; + size_t xsize; + size_t ysize; +}hipPitchedPtr; + +typedef struct hipExtent { + size_t width; // Width in elements when referring to array memory, in bytes when referring to + // linear memory + size_t height; + size_t depth; +}hipExtent; + +typedef struct hipPos { + size_t x; + size_t y; + size_t z; +}hipPos; + +typedef struct hipMemcpy3DParms { + hipArray_t srcArray; + struct hipPos srcPos; + struct hipPitchedPtr srcPtr; + + hipArray_t dstArray; + struct hipPos dstPos; + struct hipPitchedPtr dstPtr; + + struct hipExtent extent; + enum hipMemcpyKind kind; + + size_t Depth; + size_t Height; + size_t WidthInBytes; + hipDeviceptr_t dstDevice; + size_t dstHeight; + void* dstHost; + size_t dstLOD; + hipMemoryType dstMemoryType; + size_t dstPitch; + size_t dstXInBytes; + size_t dstY; + size_t dstZ; + void* reserved0; + void* reserved1; + hipDeviceptr_t srcDevice; + size_t srcHeight; + const void* srcHost; + size_t srcLOD; + hipMemoryType srcMemoryType; + size_t srcPitch; + size_t srcXInBytes; + size_t srcY; + size_t srcZ; +}hipMemcpy3DParms; + +static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz, + size_t ysz) { + struct hipPitchedPtr s; + + s.ptr = d; + s.pitch = p; + s.xsize = xsz; + s.ysize = ysz; + + return s; +} + +static inline struct hipPos make_hipPos(size_t x, size_t y, size_t z) { + struct hipPos p; + + p.x = x; + p.y = y; + p.z = z; + + return p; +} + +static inline struct hipExtent make_hipExtent(size_t w, size_t h, size_t d) { + struct hipExtent e; + + e.width = w; + e.height = h; + e.depth = d; + + return e; +} + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/functional_grid_launch.hpp b/src/utils/amd_hip/hip/hcc_detail/functional_grid_launch.hpp new file mode 100644 index 000000000..2fbda4862 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/functional_grid_launch.hpp @@ -0,0 +1,158 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "code_object_bundle.hpp" +#include "concepts.hpp" +#include "helpers.hpp" +#include "program_state.hpp" + +#include "hc.hpp" +#include "hip/hip_hcc.h" +#include "hip_runtime.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace hip_impl { +template {}>::type* = nullptr> +inline T round_up_to_next_multiple_nonnegative(T x, T y) { + T tmp = x + y - 1; + return tmp - tmp % y; +} + +template < + std::size_t n, + typename... Ts, + typename std::enable_if::type* = nullptr> +inline std::vector make_kernarg( + const std::tuple&, + const std::vector>&, + std::vector kernarg) { + return kernarg; +} + +template < + std::size_t n, + typename... Ts, + typename std::enable_if::type* = nullptr> +inline std::vector make_kernarg( + const std::tuple& formals, + const std::vector>& size_align, + std::vector kernarg) { + using T = typename std::tuple_element>::type; + + static_assert( + !std::is_reference{}, + "A __global__ function cannot have a reference as one of its " + "arguments."); + #if defined(HIP_STRICT) + static_assert( + std::is_trivially_copyable{}, + "Only TriviallyCopyable types can be arguments to a __global__ " + "function"); + #endif + + kernarg.resize(round_up_to_next_multiple_nonnegative( + kernarg.size(), size_align[n].second) + size_align[n].first); + + std::memcpy( + kernarg.data() + kernarg.size() - size_align[n].first, + &std::get(formals), + size_align[n].first); + + return make_kernarg(formals, size_align, std::move(kernarg)); +} + +template +inline std::vector make_kernarg( + void (*kernel)(Formals...), std::tuple actuals) { + static_assert(sizeof...(Formals) == sizeof...(Actuals), + "The count of formal arguments must match the count of actuals."); + + if (sizeof...(Formals) == 0) return {}; + + auto it = function_names().find(reinterpret_cast(kernel)); + if (it == function_names().cend()) { + it = + function_names(true).find(reinterpret_cast(kernel)); + if (it == function_names().cend()) { + throw std::runtime_error{"Undefined __global__ function."}; + } + } + + auto it1 = kernargs().find(it->second); + if (it1 == kernargs().end()) { + it1 = kernargs(true).find(it->second); + + if (it1 == kernargs().end()) { + throw std::runtime_error{ + "Missing metadata for __global__ function: " + it->second}; + } + } + + std::tuple to_formals{std::move(actuals)}; + std::vector kernarg; + kernarg.reserve(sizeof(to_formals)); + + return make_kernarg<0>(to_formals, it1->second, std::move(kernarg)); +} + +void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks, + const dim3& dimBlocks, std::uint32_t sharedMemBytes, hipStream_t stream, + void** kernarg); +} // Namespace hip_impl. + +template +inline void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) { + auto kernarg = hip_impl::make_kernarg( + kernel, std::tuple{std::move(args)...}); + std::size_t kernarg_size = kernarg.size(); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, kernarg.data(), HIP_LAUNCH_PARAM_BUFFER_SIZE, + &kernarg_size, HIP_LAUNCH_PARAM_END}; + + hip_impl::hipLaunchKernelGGLImpl(reinterpret_cast(kernel), numBlocks, dimBlocks, + sharedMemBytes, stream, &config[0]); +} + +template +[[deprecated("hipLaunchKernel is deprecated and will be removed in the next " + "version of HIP; please upgrade to hipLaunchKernelGGL.")]] +inline void hipLaunchKernel(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t groupMemBytes, hipStream_t stream, Args... args) { + hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{}, + std::move(args)...); +} \ No newline at end of file diff --git a/src/utils/amd_hip/hip/hcc_detail/grid_launch.h b/src/utils/amd_hip/hip/hcc_detail/grid_launch.h new file mode 100644 index 000000000..61fd9bdbe --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/grid_launch.h @@ -0,0 +1,69 @@ +#pragma once + +#include + +#include + +#define GRID_LAUNCH_VERSION 20 + +// Extern definitions +namespace hc{ +class completion_future; +class accelerator_view; +} + + +// 3 dim structure for groups and grids. +typedef struct gl_dim3 +{ + int x,y,z; + gl_dim3(uint32_t _x=1, uint32_t _y=1, uint32_t _z=1) : x(_x), y(_y), z(_z) {}; +} gl_dim3; + +typedef enum gl_barrier_bit { + barrier_bit_queue_default, + barrier_bit_none, + barrier_bit_wait, +} gl_barrier_bit; + + +// grid_launch_parm contains information used to launch the kernel. +typedef struct grid_launch_parm +{ + //! Grid dimensions + gl_dim3 grid_dim; + + //! Group dimensions + gl_dim3 group_dim; + + //! Amount of dynamic group memory to use with the kernel launch. + //! This memory is in addition to the amount used statically in the kernel. + unsigned int dynamic_group_mem_bytes; + + //! Control setting of barrier bit on per-packet basis: + //! See gl_barrier_bit description. + //! Placeholder, is not used to control packet dispatch yet + enum gl_barrier_bit barrier_bit; + + //! Value of packet fences to apply to launch. + //! The correspond to the value of bits 9:14 in the AQL packet, + //! see HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE and hsa_fence_scope_t. + //! Set to -1 for conservative defaults. + //! Placeholder, is not used to control packet dispatch yet + unsigned int launch_fence; + + //! Pointer to the accelerator_view where the kernel should execute. + //! If NULL, the default view on the default accelerator is used. + hc::accelerator_view *av; + + //! Pointer to the completion_future used to track the status of the command. + //! If NULL, the command does not write status. In this case, + //! synchronization can be enforced with queue-level waits or + //! waiting on younger commands. + hc::completion_future *cf; + + grid_launch_parm() = default; +} grid_launch_parm; + + +extern void init_grid_launch(grid_launch_parm *gl); diff --git a/src/utils/amd_hip/hip/hcc_detail/grid_launch.hpp b/src/utils/amd_hip/hip/hcc_detail/grid_launch.hpp new file mode 100644 index 000000000..04ce7e036 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/grid_launch.hpp @@ -0,0 +1,50 @@ +#pragma once + +#include "grid_launch.h" +#include "hc.hpp" + +class grid_launch_parm_cxx : public grid_launch_parm +{ +public: + grid_launch_parm_cxx() = default; + + // customized serialization: don't need av and cf in kernel + __attribute__((annotate("serialize"))) + void __cxxamp_serialize(Kalmar::Serialize& s) const { + s.Append(sizeof(int), &grid_dim.x); + s.Append(sizeof(int), &grid_dim.y); + s.Append(sizeof(int), &grid_dim.z); + s.Append(sizeof(int), &group_dim.x); + s.Append(sizeof(int), &group_dim.y); + s.Append(sizeof(int), &group_dim.z); + } + + __attribute__((annotate("user_deserialize"))) + grid_launch_parm_cxx(int grid_dim_x, int grid_dim_y, int grid_dim_z, + int group_dim_x, int group_dim_y, int group_dim_z) { + grid_dim.x = grid_dim_x; + grid_dim.y = grid_dim_y; + grid_dim.z = grid_dim_z; + group_dim.x = group_dim_x; + group_dim.y = group_dim_y; + group_dim.z = group_dim_z; + } +}; + + +extern inline void grid_launch_init(grid_launch_parm *lp) { + lp->grid_dim.x = lp->grid_dim.y = lp->grid_dim.z = 1; + + lp->group_dim.x = lp->group_dim.y = lp->group_dim.z = 1; + + lp->dynamic_group_mem_bytes = 0; + + lp->barrier_bit = barrier_bit_queue_default; + lp->launch_fence = -1; + + // TODO - set to NULL? + static hc::accelerator_view av = hc::accelerator().get_default_view(); + lp->av = &av; + lp->cf = NULL; +} + diff --git a/src/utils/amd_hip/hip/hcc_detail/grid_launch_GGL.hpp b/src/utils/amd_hip/hip/hcc_detail/grid_launch_GGL.hpp new file mode 100644 index 000000000..1c05279e0 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/grid_launch_GGL.hpp @@ -0,0 +1,30 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#if GENERIC_GRID_LAUNCH == 1 +#if __hcc_workweek__ >= 17481 +#include "functional_grid_launch.hpp" +#else +#include "macro_based_grid_launch.hpp" +#endif +#endif // GENERIC_GRID_LAUNCH \ No newline at end of file diff --git a/src/utils/amd_hip/hip/hcc_detail/helpers.hpp b/src/utils/amd_hip/hip/hcc_detail/helpers.hpp new file mode 100644 index 000000000..1916945c1 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/helpers.hpp @@ -0,0 +1,110 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "concepts.hpp" + +#include // For std::conditional, std::decay, std::enable_if, + // std::false_type, std result_of and std::true_type. +#include // For std::declval. + +namespace std { // TODO: these should be removed as soon as possible. +#if (__cplusplus < 201406L) +#if (__cplusplus < 201402L) +template +using enable_if_t = typename enable_if::type; +template +using conditional_t = typename conditional::type; +template +using decay_t = typename decay::type; +template +using result_of_t = typename result_of::type; +template +using remove_reference_t = typename remove_reference::type; +#endif +#endif +} // namespace std + +namespace hip_impl { +template +using void_t_ = void; + +#if (__cplusplus < 201402L) +template +struct is_callable_impl : is_callable_impl {}; + +// Pointer to member function, call through non-pointer. +template +struct is_callable_impl< + F(C, Ts...), 0u, + void_t_().*std::declval())(std::declval()...))> > + : std::true_type {}; + +// Pointer to member function, call through pointer. +template +struct is_callable_impl< + F(C, Ts...), 1u, + void_t_()).*std::declval())(std::declval()...))> > + : std::true_type {}; + +// Pointer to member data, call through non-pointer, no args. +template +struct is_callable_impl().*std::declval())> > + : std::true_type {}; + +// Pointer to member data, call through pointer, no args. +template +struct is_callable_impl().*std::declval())> > + : std::true_type {}; + +// General call, n args. +template +struct is_callable_impl()(std::declval()...))> > + : std::true_type {}; + +// Not callable. +template +struct is_callable_impl : std::false_type {}; +#else +template +struct is_callable_impl : std::false_type {}; + +template +struct is_callable_impl > > : std::true_type {}; +#endif +template +struct is_callable : is_callable_impl {}; + +#define count_macro_args_impl_hip_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \ + _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, \ + _26, _27, _28, _29, _30, _31, _n, ...) \ + _n +#define count_macro_args_hip_(...) \ + count_macro_args_impl_hip_(, ##__VA_ARGS__, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, \ + 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, \ + 0) + +#define overloaded_macro_expand_hip_(macro, arg_cnt) macro##arg_cnt +#define overload_macro_impl_hip_(macro, arg_cnt) overloaded_macro_expand_hip_(macro, arg_cnt) +#define overload_macro_hip_(macro, ...) \ + overload_macro_impl_hip_(macro, count_macro_args_hip_(__VA_ARGS__))(__VA_ARGS__) +} // namespace hip_impl diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_atomic.h b/src/utils/amd_hip/hip/hcc_detail/hip_atomic.h new file mode 100644 index 000000000..a5ac94a74 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_atomic.h @@ -0,0 +1,269 @@ +#pragma once + +#include "device_functions.h" + +__device__ +inline +int atomicCAS(int* address, int compare, int val) +{ + __atomic_compare_exchange_n( + address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + return compare; +} +__device__ +inline +unsigned int atomicCAS( + unsigned int* address, unsigned int compare, unsigned int val) +{ + __atomic_compare_exchange_n( + address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + return compare; +} +__device__ +inline +unsigned long long atomicCAS( + unsigned long long* address, + unsigned long long compare, + unsigned long long val) +{ + __atomic_compare_exchange_n( + address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + return compare; +} + +__device__ +inline +int atomicAdd(int* address, int val) +{ + return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicAdd(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicAdd( + unsigned long long* address, unsigned long long val) +{ + return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +float atomicAdd(float* address, float val) +{ + unsigned int* uaddr{reinterpret_cast(address)}; + unsigned int old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; + unsigned int r; + + do { + r = old; + old = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r))); + } while (r != old); + + return __uint_as_float(r); +} +__device__ +inline +double atomicAdd(double* address, double val) +{ + unsigned long long* uaddr{reinterpret_cast(address)}; + unsigned long long old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; + unsigned long long r; + + do { + r = old; + old = atomicCAS( + uaddr, r, __double_as_longlong(val + __longlong_as_double(r))); + } while (r != old); + + return __longlong_as_double(r); +} + +__device__ +inline +int atomicSub(int* address, int val) +{ + return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicSub(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED); +} + +__device__ +inline +int atomicExch(int* address, int val) +{ + return __atomic_exchange_n(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicExch(unsigned int* address, unsigned int val) +{ + return __atomic_exchange_n(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicExch(unsigned long long* address, unsigned long long val) +{ + return __atomic_exchange_n(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +float atomicExch(float* address, float val) +{ + return __uint_as_float(__atomic_exchange_n( + reinterpret_cast(address), + __float_as_uint(val), + __ATOMIC_RELAXED)); +} + +__device__ +inline +int atomicMin(int* address, int val) +{ + return __sync_fetch_and_min(address, val); +} +__device__ +inline +unsigned int atomicMin(unsigned int* address, unsigned int val) +{ + return __sync_fetch_and_umin(address, val); +} +__device__ +inline +unsigned long long atomicMin( + unsigned long long* address, unsigned long long val) +{ + unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)}; + while (val < tmp) { tmp = atomicCAS(address, tmp, val); } + + return tmp; +} + +__device__ +inline +int atomicMax(int* address, int val) +{ + return __sync_fetch_and_max(address, val); +} +__device__ +inline +unsigned int atomicMax(unsigned int* address, unsigned int val) +{ + return __sync_fetch_and_umax(address, val); +} +__device__ +inline +unsigned long long atomicMax( + unsigned long long* address, unsigned long long val) +{ + unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)}; + while (tmp < val) { tmp = atomicCAS(address, tmp, val); } + + return tmp; +} + +__device__ +inline +unsigned int atomicInc(unsigned int* address, unsigned int val) +{ + __device__ + extern + unsigned int __builtin_amdgcn_atomic_inc( + unsigned int*, + unsigned int, + unsigned int, + unsigned int, + bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32"); + + return __builtin_amdgcn_atomic_inc( + address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false); +} + +__device__ +inline +unsigned int atomicDec(unsigned int* address, unsigned int val) +{ + __device__ + extern + unsigned int __builtin_amdgcn_atomic_dec( + unsigned int*, + unsigned int, + unsigned int, + unsigned int, + bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32"); + + return __builtin_amdgcn_atomic_dec( + address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false); +} + +__device__ +inline +int atomicAnd(int* address, int val) +{ + return __atomic_fetch_and(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicAnd(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_and(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicAnd( + unsigned long long* address, unsigned long long val) +{ + return __atomic_fetch_and(address, val, __ATOMIC_RELAXED); +} + +__device__ +inline +int atomicOr(int* address, int val) +{ + return __atomic_fetch_or(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicOr(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_or(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicOr( + unsigned long long* address, unsigned long long val) +{ + return __atomic_fetch_or(address, val, __ATOMIC_RELAXED); +} + +__device__ +inline +int atomicXor(int* address, int val) +{ + return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicXor(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicXor( + unsigned long long* address, unsigned long long val) +{ + return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); +} + +// TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block. diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_complex.h b/src/utils/amd_hip/hip/hcc_detail/hip_complex.h new file mode 100644 index 000000000..d19abd724 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_complex.h @@ -0,0 +1,356 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H + +#include "hip/hcc_detail/hip_vector_types.h" + +// TODO: Clang has a bug which allows device functions to call std functions +// when std functions are introduced into default namespace by using statement. +// math.h may be included after this bug is fixed. +#if __cplusplus +#include +#else +#include "math.h" +#endif + +#if __cplusplus +#define COMPLEX_NEG_OP_OVERLOAD(type) \ + __device__ __host__ static inline type operator-(const type& op) { \ + type ret; \ + ret.x = -op.x; \ + ret.y = -op.y; \ + return ret; \ + } + +#define COMPLEX_EQ_OP_OVERLOAD(type) \ + __device__ __host__ static inline bool operator==(const type& lhs, const type& rhs) { \ + return lhs.x == rhs.x && lhs.y == rhs.y; \ + } + +#define COMPLEX_NE_OP_OVERLOAD(type) \ + __device__ __host__ static inline bool operator!=(const type& lhs, const type& rhs) { \ + return !(lhs == rhs); \ + } + +#define COMPLEX_ADD_OP_OVERLOAD(type) \ + __device__ __host__ static inline type operator+(const type& lhs, const type& rhs) { \ + type ret; \ + ret.x = lhs.x + rhs.x; \ + ret.y = lhs.y + rhs.y; \ + return ret; \ + } + +#define COMPLEX_SUB_OP_OVERLOAD(type) \ + __device__ __host__ static inline type operator-(const type& lhs, const type& rhs) { \ + type ret; \ + ret.x = lhs.x - rhs.x; \ + ret.y = lhs.y - rhs.y; \ + return ret; \ + } + +#define COMPLEX_MUL_OP_OVERLOAD(type) \ + __device__ __host__ static inline type operator*(const type& lhs, const type& rhs) { \ + type ret; \ + ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \ + ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \ + return ret; \ + } + +#define COMPLEX_DIV_OP_OVERLOAD(type) \ + __device__ __host__ static inline type operator/(const type& lhs, const type& rhs) { \ + type ret; \ + ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \ + ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \ + ret.x = ret.x / (rhs.x * rhs.x + rhs.y * rhs.y); \ + ret.y = ret.y / (rhs.x * rhs.x + rhs.y * rhs.y); \ + return ret; \ + } + +#define COMPLEX_ADD_PREOP_OVERLOAD(type) \ + __device__ __host__ static inline type& operator+=(type& lhs, const type& rhs) { \ + lhs.x += rhs.x; \ + lhs.y += rhs.y; \ + return lhs; \ + } + +#define COMPLEX_SUB_PREOP_OVERLOAD(type) \ + __device__ __host__ static inline type& operator-=(type& lhs, const type& rhs) { \ + lhs.x -= rhs.x; \ + lhs.y -= rhs.y; \ + return lhs; \ + } + +#define COMPLEX_MUL_PREOP_OVERLOAD(type) \ + __device__ __host__ static inline type& operator*=(type& lhs, const type& rhs) { \ + lhs = lhs * rhs; \ + return lhs; \ + } + +#define COMPLEX_DIV_PREOP_OVERLOAD(type) \ + __device__ __host__ static inline type& operator/=(type& lhs, const type& rhs) { \ + lhs = lhs / rhs; \ + return lhs; \ + } + +#define COMPLEX_SCALAR_PRODUCT(type, type1) \ + __device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \ + type ret; \ + ret.x = lhs.x * rhs; \ + ret.y = lhs.y * rhs; \ + return ret; \ + } +#define MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ComplexT, T) \ + explicit __device__ __host__ ComplexT(T val) : x(val), y(val) {} \ + __device__ __host__ ComplexT(T val1, T val2) : x(val1), y(val2) {} + +#endif + +struct hipFloatComplex { +#ifdef __cplusplus + public: + typedef float value_type; + __device__ __host__ hipFloatComplex() : x(0.0f), y(0.0f) {} + explicit __device__ __host__ hipFloatComplex(float x) : x(x), y(0.0f) {} + __device__ __host__ hipFloatComplex(float x, float y) : x(x), y(y) {} + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned short) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed short) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned int) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed int) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, double) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned long) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed long) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned long long) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed long long) +#endif + float x, y; +} __attribute__((aligned(8))); + +struct hipDoubleComplex { +#ifdef __cplusplus + public: + typedef double value_type; + __device__ __host__ hipDoubleComplex() : x(0.0f), y(0.0f) {} + explicit __device__ __host__ hipDoubleComplex(double x) : x(x), y(0.0f) {} + __device__ __host__ hipDoubleComplex(double x, double y) : x(x), y(y) {} + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned short) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed short) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned int) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed int) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, float) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned long) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed long) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned long long) + MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed long long) +#endif + double x, y; +} __attribute__((aligned(16))); + +#if __cplusplus + +COMPLEX_NEG_OP_OVERLOAD(hipFloatComplex) +COMPLEX_EQ_OP_OVERLOAD(hipFloatComplex) +COMPLEX_NE_OP_OVERLOAD(hipFloatComplex) +COMPLEX_ADD_OP_OVERLOAD(hipFloatComplex) +COMPLEX_SUB_OP_OVERLOAD(hipFloatComplex) +COMPLEX_MUL_OP_OVERLOAD(hipFloatComplex) +COMPLEX_DIV_OP_OVERLOAD(hipFloatComplex) +COMPLEX_ADD_PREOP_OVERLOAD(hipFloatComplex) +COMPLEX_SUB_PREOP_OVERLOAD(hipFloatComplex) +COMPLEX_MUL_PREOP_OVERLOAD(hipFloatComplex) +COMPLEX_DIV_PREOP_OVERLOAD(hipFloatComplex) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned short) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed short) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned int) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed int) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, float) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, double) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long long) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long long) + +COMPLEX_NEG_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_EQ_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_NE_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_ADD_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_SUB_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_MUL_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_DIV_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_ADD_PREOP_OVERLOAD(hipDoubleComplex) +COMPLEX_SUB_PREOP_OVERLOAD(hipDoubleComplex) +COMPLEX_MUL_PREOP_OVERLOAD(hipDoubleComplex) +COMPLEX_DIV_PREOP_OVERLOAD(hipDoubleComplex) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned short) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed short) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned int) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed int) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, float) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, double) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long long) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long) + +#endif + +__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return z.x; } + +__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return z.y; } + +__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { + hipFloatComplex z; + z.x = a; + z.y = b; + return z; +} + +__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { + hipFloatComplex ret; + ret.x = z.x; + ret.y = -z.y; + return ret; +} + +__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) { + return z.x * z.x + z.y * z.y; +} + +__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x + q.x, p.y + q.y); +} + +__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x - q.x, p.y - q.y); +} + +__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); +} + +__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { + float sqabs = hipCsqabsf(q); + hipFloatComplex ret; + ret.x = (p.x * q.x + p.y * q.y) / sqabs; + ret.y = (p.y * q.x - p.x * q.y) / sqabs; + return ret; +} + +__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); } + +__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return z.x; } + +__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return z.y; } + +__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { + hipDoubleComplex z; + z.x = a; + z.y = b; + return z; +} + +__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { + hipDoubleComplex ret; + ret.x = z.x; + ret.y = z.y; + return ret; +} + +__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) { + return z.x * z.x + z.y * z.y; +} + +__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x + q.x, p.y + q.y); +} + +__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x - q.x, p.y - q.y); +} + +__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); +} + +__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { + double sqabs = hipCsqabs(q); + hipDoubleComplex ret; + ret.x = (p.x * q.x + p.y * q.y) / sqabs; + ret.y = (p.y * q.x - p.x * q.y) / sqabs; + return ret; +} + +__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return sqrtf(hipCsqabs(z)); } + +typedef hipFloatComplex hipComplex; + +__device__ __host__ static inline hipComplex make_hipComplex(float x, float y) { + return make_hipFloatComplex(x, y); +} + +__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { + return make_hipFloatComplex((float)z.x, (float)z.y); +} + +__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { + return make_hipDoubleComplex((double)z.x, (double)z.y); +} + +__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { + float real = (p.x * q.x) + r.x; + float imag = (q.x * p.y) + r.y; + + real = -(p.y * q.y) + real; + imag = (p.x * q.y) + imag; + + return make_hipComplex(real, imag); +} + +__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, + hipDoubleComplex r) { + float real = (p.x * q.x) + r.x; + float imag = (q.x * p.y) + r.y; + + real = -(p.y * q.y) + real; + imag = (p.x * q.y) + imag; + + return make_hipDoubleComplex(real, imag); +} + +// Complex functions returning real numbers. +#define __DEFINE_HIP_COMPLEX_REAL_FUN(func, hipFun) \ +__device__ __host__ inline float func(const hipFloatComplex& z) { return hipFun##f(z); } \ +__device__ __host__ inline double func(const hipDoubleComplex& z) { return hipFun(z); } + +__DEFINE_HIP_COMPLEX_REAL_FUN(abs, hipCabs) +__DEFINE_HIP_COMPLEX_REAL_FUN(real, hipCreal) +__DEFINE_HIP_COMPLEX_REAL_FUN(imag, hipCimag) + +// Complex functions returning complex numbers. +#define __DEFINE_HIP_COMPLEX_FUN(func, hipFun) \ +__device__ __host__ inline hipFloatComplex func(const hipFloatComplex& z) { return hipFun##f(z); } \ +__device__ __host__ inline hipDoubleComplex func(const hipDoubleComplex& z) { return hipFun(z); } + +__DEFINE_HIP_COMPLEX_FUN(conj, hipConj) + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_db.h b/src/utils/amd_hip/hip/hcc_detail/hip_db.h new file mode 100644 index 000000000..91ff54d3a --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_db.h @@ -0,0 +1,21 @@ +/** + * @defgroup HipDb HCC-specific debug facilities + * @{ + */ + + +/** + * @brief * Print memory tracker information for this pointer. + * + * HIP maintains a table for all memory allocations performed by the application. + * If targetAddress is 0, the entire table is printed to stderr. + * If targetAddress is non-null, this routine will perform some forensic analysis + * to find the pointer + */ +void hipdbPrintMem(void* targetAddress); + + +// doxygen end HipDb +/** + * @} + */ diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_fp16.h b/src/utils/amd_hip/hip/hcc_detail/hip_fp16.h new file mode 100644 index 000000000..849b7278d --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_fp16.h @@ -0,0 +1,1645 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "hip/hcc_detail/host_defines.h" +#include +#if defined(__cplusplus) + #include + #include + #include +#endif + +#if defined(__clang__) && (__clang_major__ > 5) + typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2))); + + struct __half_raw { + union { + static_assert(sizeof(_Float16) == sizeof(unsigned short), ""); + + _Float16 data; + unsigned short x; + }; + }; + + struct __half2_raw { + union { + static_assert(sizeof(_Float16_2) == sizeof(unsigned short[2]), ""); + + _Float16_2 data; + struct { + unsigned short x; + unsigned short y; + }; + }; + }; + + #if defined(__cplusplus) + #include "hip_fp16_math_fwd.h" + #include "hip_vector_types.h" + #include "host_defines.h" + + namespace std + { + template<> struct is_floating_point<_Float16> : std::true_type {}; + } + + template + using Enable_if_t = typename std::enable_if::type; + + // BEGIN STRUCT __HALF + struct __half { + protected: + union { + static_assert(sizeof(_Float16) == sizeof(unsigned short), ""); + + _Float16 data; + unsigned short __x; + }; + public: + // CREATORS + __host__ __device__ + __half() = default; + __host__ __device__ + __half(const __half_raw& x) : data{x.data} {} + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + __host__ __device__ + __half(decltype(data) x) : data{x} {} + template< + typename T, + Enable_if_t{}>* = nullptr> + __host__ __device__ + __half(T x) : data{static_cast<_Float16>(x)} {} + #endif + __host__ __device__ + __half(const __half&) = default; + __host__ __device__ + __half(__half&&) = default; + __host__ __device__ + ~__half() = default; + + // CREATORS - DEVICE ONLY + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, Enable_if_t{}>* = nullptr> + __device__ + __half(T x) : data{static_cast<_Float16>(x)} {} + #endif + + // MANIPULATORS + __host__ __device__ + __half& operator=(const __half&) = default; + __host__ __device__ + __half& operator=(__half&&) = default; + __host__ __device__ + __half& operator=(const __half_raw& x) + { + data = x.data; + return *this; + } + __host__ __device__ + volatile __half& operator=(const __half_raw& x) volatile + { + data = x.data; + return *this; + } + volatile __half& operator=(const volatile __half_raw& x) volatile + { + data = x.data; + return *this; + } + __half& operator=(__half_raw&& x) + { + data = x.data; + return *this; + } + volatile __half& operator=(__half_raw&& x) volatile + { + data = x.data; + return *this; + } + volatile __half& operator=(volatile __half_raw&& x) volatile + { + data = x.data; + return *this; + } + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, + Enable_if_t{}>* = nullptr> + __host__ __device__ + __half& operator=(T x) + { + data = static_cast<_Float16>(x); + return *this; + } + #endif + + // MANIPULATORS - DEVICE ONLY + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, Enable_if_t{}>* = nullptr> + __device__ + __half& operator=(T x) + { + data = static_cast<_Float16>(x); + return *this; + } + #endif + + #if !defined(__HIP_NO_HALF_OPERATORS__) + __device__ + __half& operator+=(const __half& x) + { + data += x.data; + return *this; + } + __device__ + __half& operator-=(const __half& x) + { + data -= x.data; + return *this; + } + __device__ + __half& operator*=(const __half& x) + { + data *= x.data; + return *this; + } + __device__ + __half& operator/=(const __half& x) + { + data /= x.data; + return *this; + } + __device__ + __half& operator++() { ++data; return *this; } + __device__ + __half operator++(int) + { + __half tmp{*this}; + ++*this; + return tmp; + } + __device__ + __half& operator--() { --data; return *this; } + __device__ + __half operator--(int) + { + __half tmp{*this}; + --*this; + return tmp; + } + #endif + + // ACCESSORS + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, + Enable_if_t< + std::is_floating_point{} && + !std::is_same{}>* = nullptr> + operator T() const { return data; } + #endif + __host__ __device__ + operator __half_raw() const { return __half_raw{data}; } + __host__ __device__ + operator volatile __half_raw() const volatile + { + return __half_raw{data}; + } + + // ACCESSORS - DEVICE ONLY + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, Enable_if_t{}>* = nullptr> + __device__ + operator T() const { return data; } + #endif + + #if !defined(__HIP_NO_HALF_OPERATORS__) + __device__ + __half operator+() const { return *this; } + __device__ + __half operator-() const + { + __half tmp{*this}; + tmp.data = -tmp.data; + return tmp; + } + #endif + + // FRIENDS + #if !defined(__HIP_NO_HALF_OPERATORS__) + friend + inline + __device__ + __half operator+(const __half& x, const __half& y) + { + return __half{x} += y; + } + friend + inline + __device__ + __half operator-(const __half& x, const __half& y) + { + return __half{x} -= y; + } + friend + inline + __device__ + __half operator*(const __half& x, const __half& y) + { + return __half{x} *= y; + } + friend + inline + __device__ + __half operator/(const __half& x, const __half& y) + { + return __half{x} /= y; + } + friend + inline + __device__ + bool operator==(const __half& x, const __half& y) + { + return x.data == y.data; + } + friend + inline + __device__ + bool operator!=(const __half& x, const __half& y) + { + return !(x == y); + } + friend + inline + __device__ + bool operator<(const __half& x, const __half& y) + { + return x.data < y.data; + } + friend + inline + __device__ + bool operator>(const __half& x, const __half& y) + { + return y.data < x.data; + } + friend + inline + __device__ + bool operator<=(const __half& x, const __half& y) + { + return !(y < x); + } + friend + inline + __device__ + bool operator>=(const __half& x, const __half& y) + { + return !(x < y); + } + #endif // !defined(__HIP_NO_HALF_OPERATORS__) + }; + // END STRUCT __HALF + + // BEGIN STRUCT __HALF2 + struct __half2 { + protected: + union { + static_assert( + sizeof(_Float16_2) == sizeof(unsigned short[2]), ""); + + _Float16_2 data; + struct { + unsigned short x; + unsigned short y; + }; + }; + public: + // CREATORS + __host__ __device__ + __half2() = default; + __host__ __device__ + __half2(const __half2_raw& x) : data{x.data} {} + __host__ __device__ + __half2(decltype(data) x) : data{x} {} + __host__ __device__ + __half2(const __half& x, const __half& y) + : + data{ + static_cast<__half_raw>(x).data, + static_cast<__half_raw>(y).data} + {} + __host__ __device__ + __half2(const __half2&) = default; + __host__ __device__ + __half2(__half2&&) = default; + __host__ __device__ + ~__half2() = default; + + // MANIPULATORS + __host__ __device__ + __half2& operator=(const __half2&) = default; + __host__ __device__ + __half2& operator=(__half2&&) = default; + __host__ __device__ + __half2& operator=(const __half2_raw& x) + { + data = x.data; + return *this; + } + + // MANIPULATORS - DEVICE ONLY + #if !defined(__HIP_NO_HALF_OPERATORS__) + __device__ + __half2& operator+=(const __half2& x) + { + data += x.data; + return *this; + } + __device__ + __half2& operator-=(const __half2& x) + { + data -= x.data; + return *this; + } + __device__ + __half2& operator*=(const __half2& x) + { + data *= x.data; + return *this; + } + __device__ + __half2& operator/=(const __half2& x) + { + data /= x.data; + return *this; + } + __device__ + __half2& operator++() { return *this += _Float16_2{1, 1}; } + __device__ + __half2 operator++(int) + { + __half2 tmp{*this}; + ++*this; + return tmp; + } + __device__ + __half2& operator--() { return *this -= _Float16_2{1, 1}; } + __device__ + __half2 operator--(int) + { + __half2 tmp{*this}; + --*this; + return tmp; + } + #endif + + // ACCESSORS + __host__ __device__ + operator decltype(data)() const { return data; } + __host__ __device__ + operator __half2_raw() const { return __half2_raw{data}; } + + // ACCESSORS - DEVICE ONLY + #if !defined(__HIP_NO_HALF_OPERATORS__) + __device__ + __half2 operator+() const { return *this; } + __device__ + __half2 operator-() const + { + __half2 tmp{*this}; + tmp.data = -tmp.data; + return tmp; + } + #endif + + // FRIENDS + #if !defined(__HIP_NO_HALF_OPERATORS__) + friend + inline + __device__ + __half2 operator+(const __half2& x, const __half2& y) + { + return __half2{x} += y; + } + friend + inline + __device__ + __half2 operator-(const __half2& x, const __half2& y) + { + return __half2{x} -= y; + } + friend + inline + __device__ + __half2 operator*(const __half2& x, const __half2& y) + { + return __half2{x} *= y; + } + friend + inline + __device__ + __half2 operator/(const __half2& x, const __half2& y) + { + return __half2{x} /= y; + } + friend + inline + __device__ + bool operator==(const __half2& x, const __half2& y) + { + auto r = x.data == y.data; + return r.x != 0 && r.y != 0; + } + friend + inline + __device__ + bool operator!=(const __half2& x, const __half2& y) + { + return !(x == y); + } + friend + inline + __device__ + bool operator<(const __half2& x, const __half2& y) + { + auto r = x.data < y.data; + return r.x != 0 && r.y != 0; + } + friend + inline + __device__ + bool operator>(const __half2& x, const __half2& y) + { + return y < x; + } + friend + inline + __device__ + bool operator<=(const __half2& x, const __half2& y) + { + return !(y < x); + } + friend + inline + __device__ + bool operator>=(const __half2& x, const __half2& y) + { + return !(x < y); + } + #endif // !defined(__HIP_NO_HALF_OPERATORS__) + }; + // END STRUCT __HALF2 + + namespace + { + inline + __host__ __device__ + __half2 make_half2(__half x, __half y) + { + return __half2{x, y}; + } + + inline + __device__ + __half __low2half(__half2 x) + { + return __half{__half_raw{static_cast<__half2_raw>(x).data.x}}; + } + + inline + __device__ + __half __high2half(__half2 x) + { + return __half{__half_raw{static_cast<__half2_raw>(x).data.y}}; + } + + inline + __device__ + __half2 __half2half2(__half x) + { + return __half2{x, x}; + } + + inline + __device__ + __half2 __halves2half2(__half x, __half y) + { + return __half2{x, y}; + } + + inline + __device__ + __half2 __low2half2(__half2 x) + { + return __half2{ + _Float16_2{ + static_cast<__half2_raw>(x).data.x, + static_cast<__half2_raw>(x).data.x}}; + } + + inline + __device__ + __half2 __high2half2(__half2 x) + { + return __half2_raw{ + _Float16_2{ + static_cast<__half2_raw>(x).data.y, + static_cast<__half2_raw>(x).data.y}}; + } + + inline + __device__ + __half2 __lows2half2(__half2 x, __half2 y) + { + return __half2_raw{ + _Float16_2{ + static_cast<__half2_raw>(x).data.x, + static_cast<__half2_raw>(y).data.x}}; + } + + inline + __device__ + __half2 __highs2half2(__half2 x, __half2 y) + { + return __half2_raw{ + _Float16_2{ + static_cast<__half2_raw>(x).data.y, + static_cast<__half2_raw>(y).data.y}}; + } + + inline + __device__ + __half2 __lowhigh2highlow(__half2 x) + { + return __half2_raw{ + _Float16_2{ + static_cast<__half2_raw>(x).data.y, + static_cast<__half2_raw>(x).data.x}}; + } + + // Bitcasts + inline + __device__ + short __half_as_short(__half x) + { + return static_cast<__half_raw>(x).x; + } + + inline + __device__ + unsigned short __half_as_ushort(__half x) + { + return static_cast<__half_raw>(x).x; + } + + inline + __device__ + __half __short_as_half(short x) + { + __half_raw r; r.x = x; + return r; + } + + inline + __device__ + __half __ushort_as_half(unsigned short x) + { + __half_raw r; r.x = x; + return r; + } + + // TODO: rounding behaviour is not correct. + // float -> half | half2 + inline + __device__ __host__ + __half __float2half(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ __host__ + __half __float2half_rn(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ __host__ + __half __float2half_rz(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ __host__ + __half __float2half_rd(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ __host__ + __half __float2half_ru(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ __host__ + __half2 __float2half2_rn(float x) + { + return __half2_raw{ + _Float16_2{ + static_cast<_Float16>(x), static_cast<_Float16>(x)}}; + } + inline + __device__ __host__ + __half2 __floats2half2_rn(float x, float y) + { + return __half2_raw{_Float16_2{ + static_cast<_Float16>(x), static_cast<_Float16>(y)}}; + } + inline + __device__ __host__ + __half2 __float22half2_rn(float2 x) + { + return __floats2half2_rn(x.x, x.y); + } + + // half | half2 -> float + inline + __device__ __host__ + float __half2float(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ __host__ + float __low2float(__half2 x) + { + return static_cast<__half2_raw>(x).data.x; + } + inline + __device__ __host__ + float __high2float(__half2 x) + { + return static_cast<__half2_raw>(x).data.y; + } + inline + __device__ __host__ + float2 __half22float2(__half2 x) + { + return make_float2( + static_cast<__half2_raw>(x).data.x, + static_cast<__half2_raw>(x).data.y); + } + + // half -> int + inline + __device__ + int __half2int_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + int __half2int_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + int __half2int_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + int __half2int_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // int -> half + inline + __device__ + __half __int2half_rn(int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __int2half_rz(int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __int2half_rd(int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __int2half_ru(int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> short + inline + __device__ + short __half2short_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + short __half2short_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + short __half2short_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + short __half2short_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // short -> half + inline + __device__ + __half __short2half_rn(short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __short2half_rz(short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __short2half_rd(short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __short2half_ru(short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> long long + inline + __device__ + long long __half2ll_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + long long __half2ll_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + long long __half2ll_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + long long __half2ll_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // long long -> half + inline + __device__ + __half __ll2half_rn(long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ll2half_rz(long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ll2half_rd(long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ll2half_ru(long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> unsigned int + inline + __device__ + unsigned int __half2uint_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned int __half2uint_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned int __half2uint_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned int __half2uint_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // unsigned int -> half + inline + __device__ + __half __uint2half_rn(unsigned int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __uint2half_rz(unsigned int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __uint2half_rd(unsigned int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __uint2half_ru(unsigned int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> unsigned short + inline + __device__ + unsigned short __half2ushort_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned short __half2ushort_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned short __half2ushort_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned short __half2ushort_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // unsigned short -> half + inline + __device__ + __half __ushort2half_rn(unsigned short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ushort2half_rz(unsigned short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ushort2half_rd(unsigned short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ushort2half_ru(unsigned short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> unsigned long long + inline + __device__ + unsigned long long __half2ull_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned long long __half2ull_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned long long __half2ull_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned long long __half2ull_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // unsigned long long -> half + inline + __device__ + __half __ull2half_rn(unsigned long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ull2half_rz(unsigned long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ull2half_rd(unsigned long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ull2half_ru(unsigned long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // Load primitives + inline + __device__ + __half __ldg(const __half* ptr) { return *ptr; } + inline + __device__ + __half __ldcg(const __half* ptr) { return *ptr; } + inline + __device__ + __half __ldca(const __half* ptr) { return *ptr; } + inline + __device__ + __half __ldcs(const __half* ptr) { return *ptr; } + + inline + __device__ + __half2 __ldg(const __half2* ptr) { return *ptr; } + inline + __device__ + __half2 __ldcg(const __half2* ptr) { return *ptr; } + inline + __device__ + __half2 __ldca(const __half2* ptr) { return *ptr; } + inline + __device__ + __half2 __ldcs(const __half2* ptr) { return *ptr; } + + // Relations + inline + __device__ + bool __heq(__half x, __half y) + { + return static_cast<__half_raw>(x).data == + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hne(__half x, __half y) + { + return static_cast<__half_raw>(x).data != + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hle(__half x, __half y) + { + return static_cast<__half_raw>(x).data <= + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hge(__half x, __half y) + { + return static_cast<__half_raw>(x).data >= + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hlt(__half x, __half y) + { + return static_cast<__half_raw>(x).data < + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hgt(__half x, __half y) + { + return static_cast<__half_raw>(x).data > + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hequ(__half x, __half y) { return __heq(x, y); } + inline + __device__ + bool __hneu(__half x, __half y) { return __hne(x, y); } + inline + __device__ + bool __hleu(__half x, __half y) { return __hle(x, y); } + inline + __device__ + bool __hgeu(__half x, __half y) { return __hge(x, y); } + inline + __device__ + bool __hltu(__half x, __half y) { return __hlt(x, y); } + inline + __device__ + bool __hgtu(__half x, __half y) { return __hgt(x, y); } + + inline + __device__ + __half2 __heq2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data == + static_cast<__half2_raw>(y).data; + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __device__ + __half2 __hne2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data != + static_cast<__half2_raw>(y).data; + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __device__ + __half2 __hle2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data <= + static_cast<__half2_raw>(y).data; + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __device__ + __half2 __hge2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data >= + static_cast<__half2_raw>(y).data; + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __device__ + __half2 __hlt2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data < + static_cast<__half2_raw>(y).data; + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __device__ + __half2 __hgt2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data > + static_cast<__half2_raw>(y).data; + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __device__ + __half2 __hequ2(__half2 x, __half2 y) { return __heq2(x, y); } + inline + __device__ + __half2 __hneu2(__half2 x, __half2 y) { return __hne2(x, y); } + inline + __device__ + __half2 __hleu2(__half2 x, __half2 y) { return __hle2(x, y); } + inline + __device__ + __half2 __hgeu2(__half2 x, __half2 y) { return __hge2(x, y); } + inline + __device__ + __half2 __hltu2(__half2 x, __half2 y) { return __hlt2(x, y); } + inline + __device__ + __half2 __hgtu2(__half2 x, __half2 y) { return __hgt2(x, y); } + + inline + __device__ + bool __hbeq2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__heq2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __device__ + bool __hbne2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hne2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __device__ + bool __hble2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hle2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __device__ + bool __hbge2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hge2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __device__ + bool __hblt2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hlt2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __device__ + bool __hbgt2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hgt2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __device__ + bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); } + inline + __device__ + bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); } + inline + __device__ + bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); } + inline + __device__ + bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); } + inline + __device__ + bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); } + inline + __device__ + bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); } + + // Arithmetic + inline + __device__ + __half __clamp_01(__half x) + { + auto r = static_cast<__half_raw>(x); + + if (__hlt(x, __half_raw{0})) return __half_raw{0}; + if (__hlt(__half_raw{1}, x)) return __half_raw{1}; + return r; + } + + inline + __device__ + __half __hadd(__half x, __half y) + { + return __half_raw{ + static_cast<__half_raw>(x).data + + static_cast<__half_raw>(y).data}; + } + inline + __device__ + __half __hsub(__half x, __half y) + { + return __half_raw{ + static_cast<__half_raw>(x).data - + static_cast<__half_raw>(y).data}; + } + inline + __device__ + __half __hmul(__half x, __half y) + { + return __half_raw{ + static_cast<__half_raw>(x).data * + static_cast<__half_raw>(y).data}; + } + inline + __device__ + __half __hadd_sat(__half x, __half y) + { + return __clamp_01(__hadd(x, y)); + } + inline + __device__ + __half __hsub_sat(__half x, __half y) + { + return __clamp_01(__hsub(x, y)); + } + inline + __device__ + __half __hmul_sat(__half x, __half y) + { + return __clamp_01(__hmul(x, y)); + } + inline + __device__ + __half __hfma(__half x, __half y, __half z) + { + return __half_raw{__ocml_fma_f16( + static_cast<__half_raw>(x).data, + static_cast<__half_raw>(y).data, + static_cast<__half_raw>(z).data)}; + } + inline + __device__ + __half __hfma_sat(__half x, __half y, __half z) + { + return __clamp_01(__hfma(x, y, z)); + } + inline + __device__ + __half __hdiv(__half x, __half y) + { + return __half_raw{ + static_cast<__half_raw>(x).data / + static_cast<__half_raw>(y).data}; + } + + inline + __device__ + __half2 __hadd2(__half2 x, __half2 y) + { + return __half2_raw{ + static_cast<__half2_raw>(x).data + + static_cast<__half2_raw>(y).data}; + } + inline + __device__ + __half2 __hsub2(__half2 x, __half2 y) + { + return __half2_raw{ + static_cast<__half2_raw>(x).data - + static_cast<__half2_raw>(y).data}; + } + inline + __device__ + __half2 __hmul2(__half2 x, __half2 y) + { + return __half2_raw{ + static_cast<__half2_raw>(x).data * + static_cast<__half2_raw>(y).data}; + } + inline + __device__ + __half2 __hadd2_sat(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hadd2(x, y)); + return __half2{ + __clamp_01(__half_raw{r.data.x}), + __clamp_01(__half_raw{r.data.y})}; + } + inline + __device__ + __half2 __hsub2_sat(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hsub2(x, y)); + return __half2{ + __clamp_01(__half_raw{r.data.x}), + __clamp_01(__half_raw{r.data.y})}; + } + inline + __device__ + __half2 __hmul2_sat(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hmul2(x, y)); + return __half2{ + __clamp_01(__half_raw{r.data.x}), + __clamp_01(__half_raw{r.data.y})}; + } + inline + __device__ + __half2 __hfma2(__half2 x, __half2 y, __half2 z) + { + return __half2_raw{__ocml_fma_2f16(x, y, z)}; + } + inline + __device__ + __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z) + { + auto r = static_cast<__half2_raw>(__hfma2(x, y, z)); + return __half2{ + __clamp_01(__half_raw{r.data.x}), + __clamp_01(__half_raw{r.data.y})}; + } + inline + __device__ + __half2 __h2div(__half2 x, __half2 y) + { + return __half2_raw{ + static_cast<__half2_raw>(x).data / + static_cast<__half2_raw>(y).data}; + } + + // Math functions + #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ + inline + __device__ + float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) { + return __ockl_fdot2(static_cast<__half2_raw>(a).data, + static_cast<__half2_raw>(b).data, + c, saturate); + } + #endif + inline + __device__ + __half htrunc(__half x) + { + return __half_raw{ + __ocml_trunc_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hceil(__half x) + { + return __half_raw{ + __ocml_ceil_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hfloor(__half x) + { + return __half_raw{ + __ocml_floor_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hrint(__half x) + { + return __half_raw{ + __ocml_rint_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hsin(__half x) + { + return __half_raw{ + __ocml_sin_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hcos(__half x) + { + return __half_raw{ + __ocml_cos_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hexp(__half x) + { + return __half_raw{ + __ocml_exp_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hexp2(__half x) + { + return __half_raw{ + __ocml_exp2_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hexp10(__half x) + { + return __half_raw{ + __ocml_exp10_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hlog2(__half x) + { + return __half_raw{ + __ocml_log2_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hlog(__half x) + { + return __half_raw{ + __ocml_log_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hlog10(__half x) + { + return __half_raw{ + __ocml_log10_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hrcp(__half x) + { + return __half_raw{ + __llvm_amdgcn_rcp_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hrsqrt(__half x) + { + return __half_raw{ + __ocml_rsqrt_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hsqrt(__half x) + { + return __half_raw{ + __ocml_sqrt_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + bool __hisinf(__half x) + { + return __ocml_isinf_f16(static_cast<__half_raw>(x).data); + } + inline + __device__ + bool __hisnan(__half x) + { + return __ocml_isnan_f16(static_cast<__half_raw>(x).data); + } + inline + __device__ + __half __hneg(__half x) + { + return __half_raw{-static_cast<__half_raw>(x).data}; + } + + inline + __device__ + __half2 h2trunc(__half2 x) + { + return __half2_raw{__ocml_trunc_2f16(x)}; + } + inline + __device__ + __half2 h2ceil(__half2 x) + { + return __half2_raw{__ocml_ceil_2f16(x)}; + } + inline + __device__ + __half2 h2floor(__half2 x) + { + return __half2_raw{__ocml_floor_2f16(x)}; + } + inline + __device__ + __half2 h2rint(__half2 x) + { + return __half2_raw{__ocml_rint_2f16(x)}; + } + inline + __device__ + __half2 h2sin(__half2 x) + { + return __half2_raw{__ocml_sin_2f16(x)}; + } + inline + __device__ + __half2 h2cos(__half2 x) + { + return __half2_raw{__ocml_cos_2f16(x)}; + } + inline + __device__ + __half2 h2exp(__half2 x) + { + return __half2_raw{__ocml_exp_2f16(x)}; + } + inline + __device__ + __half2 h2exp2(__half2 x) + { + return __half2_raw{__ocml_exp2_2f16(x)}; + } + inline + __device__ + __half2 h2exp10(__half2 x) + { + return __half2_raw{__ocml_exp10_2f16(x)}; + } + inline + __device__ + __half2 h2log2(__half2 x) + { + return __half2_raw{__ocml_log2_2f16(x)}; + } + inline + __device__ + __half2 h2log(__half2 x) { return __ocml_log_2f16(x); } + inline + __device__ + __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); } + inline + __device__ + __half2 h2rcp(__half2 x) { return __llvm_amdgcn_rcp_2f16(x); } + inline + __device__ + __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); } + inline + __device__ + __half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); } + inline + __device__ + __half2 __hisinf2(__half2 x) + { + auto r = __ocml_isinf_2f16(x); + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __device__ + __half2 __hisnan2(__half2 x) + { + auto r = __ocml_isnan_2f16(x); + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __device__ + __half2 __hneg2(__half2 x) + { + return __half2_raw{-static_cast<__half2_raw>(x).data}; + } + } // Anonymous namespace. + + #if !defined(HIP_NO_HALF) + using half = __half; + using half2 = __half2; + #endif + #endif // defined(__cplusplus) +#elif defined(__GNUC__) + #include "hip_fp16_gcc.h" +#endif // !defined(__clang__) && defined(__GNUC__) diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_fp16_gcc.h b/src/utils/amd_hip/hip/hcc_detail/hip_fp16_gcc.h new file mode 100644 index 000000000..9b31f9e3c --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_fp16_gcc.h @@ -0,0 +1,257 @@ +#pragma once + +#if defined(__cplusplus) + #include +#endif + +struct __half_raw { + unsigned short x; +}; + +struct __half2_raw { + unsigned short x; + unsigned short y; +}; + +#if defined(__cplusplus) + struct __half; + + __half __float2half(float); + float __half2float(__half); + + // BEGIN STRUCT __HALF + struct __half { + protected: + unsigned short __x; + public: + // CREATORS + __half() = default; + __half(const __half_raw& x) : __x{x.x} {} + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + __half(float x) : __x{__float2half(x).__x} {} + __half(double x) : __x{__float2half(x).__x} {} + #endif + __half(const __half&) = default; + __half(__half&&) = default; + ~__half() = default; + + // MANIPULATORS + __half& operator=(const __half&) = default; + __half& operator=(__half&&) = default; + __half& operator=(const __half_raw& x) { __x = x.x; return *this; } + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + __half& operator=(float x) + { + __x = __float2half(x).__x; + return *this; + } + __half& operator=(double x) + { + return *this = static_cast(x); + } + #endif + + // ACCESSORS + operator float() const { return __half2float(*this); } + operator __half_raw() const { return __half_raw{__x}; } + }; + // END STRUCT __HALF + + // BEGIN STRUCT __HALF2 + struct __half2 { + protected: + __half x; + __half y; + public: + // CREATORS + __half2() = default; + __half2(const __half2_raw& ix) + : + x{reinterpret_cast(ix.x)}, + y{reinterpret_cast(ix.y)} + {} + __half2(const __half& ix, const __half& iy) : x{ix}, y{iy} {} + __half2(const __half2&) = default; + __half2(__half2&&) = default; + ~__half2() = default; + + // MANIPULATORS + __half2& operator=(const __half2&) = default; + __half2& operator=(__half2&&) = default; + __half2& operator=(const __half2_raw& ix) + { + x = reinterpret_cast(ix.x); + y = reinterpret_cast(ix.y); + return *this; + } + + // ACCESSORS + operator __half2_raw() const + { + return __half2_raw{ + reinterpret_cast(x), + reinterpret_cast(y)}; + } + }; + // END STRUCT __HALF2 + + namespace + { + inline + unsigned short __internal_float2half( + float flt, unsigned int& sgn, unsigned int& rem) + { + unsigned int x{}; + std::memcpy(&x, &flt, sizeof(flt)); + + unsigned int u = (x & 0x7fffffffU); + sgn = ((x >> 16) & 0x8000U); + + // NaN/+Inf/-Inf + if (u >= 0x7f800000U) { + rem = 0; + return static_cast( + (u == 0x7f800000U) ? (sgn | 0x7c00U) : 0x7fffU); + } + // Overflows + if (u > 0x477fefffU) { + rem = 0x80000000U; + return static_cast(sgn | 0x7bffU); + } + // Normal numbers + if (u >= 0x38800000U) { + rem = u << 19; + u -= 0x38000000U; + return static_cast(sgn | (u >> 13)); + } + // +0/-0 + if (u < 0x33000001U) { + rem = u; + return static_cast(sgn); + } + // Denormal numbers + unsigned int exponent = u >> 23; + unsigned int mantissa = (u & 0x7fffffU); + unsigned int shift = 0x7eU - exponent; + mantissa |= 0x800000U; + rem = mantissa << (32 - shift); + return static_cast(sgn | (mantissa >> shift)); + } + + inline + __half __float2half(float x) + { + __half_raw r; + unsigned int sgn{}; + unsigned int rem{}; + r.x = __internal_float2half(x, sgn, rem); + if (rem > 0x80000000U || (rem == 0x80000000U && (r.x & 0x1))) ++r.x; + + return r; + } + + inline + __half __float2half_rn(float x) { return __float2half(x); } + + inline + __half __float2half_rz(float x) + { + __half_raw r; + unsigned int sgn{}; + unsigned int rem{}; + r.x = __internal_float2half(x, sgn, rem); + + return r; + } + + inline + __half __float2half_rd(float x) + { + __half_raw r; + unsigned int sgn{}; + unsigned int rem{}; + r.x = __internal_float2half(x, sgn, rem); + if (rem && sgn) ++r.x; + + return r; + } + + inline + __half __float2half_ru(float x) + { + __half_raw r; + unsigned int sgn{}; + unsigned int rem{}; + r.x = __internal_float2half(x, sgn, rem); + if (rem && !sgn) ++r.x; + + return r; + } + + inline + __half2 __float2half2_rn(float x) + { + return __half2{__float2half_rn(x), __float2half_rn(x)}; + } + + inline + __half2 __floats2half2_rn(float x, float y) + { + return __half2{__float2half_rn(x), __float2half_rn(y)}; + } + + inline + float __internal_half2float(unsigned short x) + { + unsigned int sign = ((x >> 15) & 1); + unsigned int exponent = ((x >> 10) & 0x1f); + unsigned int mantissa = ((x & 0x3ff) << 13); + + if (exponent == 0x1fU) { /* NaN or Inf */ + mantissa = (mantissa ? (sign = 0, 0x7fffffU) : 0); + exponent = 0xffU; + } else if (!exponent) { /* Denorm or Zero */ + if (mantissa) { + unsigned int msb; + exponent = 0x71U; + do { + msb = (mantissa & 0x400000U); + mantissa <<= 1; /* normalize */ + --exponent; + } while (!msb); + mantissa &= 0x7fffffU; /* 1.mantissa is implicit */ + } + } else { + exponent += 0x70U; + } + unsigned int u = ((sign << 31) | (exponent << 23) | mantissa); + float f; + memcpy(&f, &u, sizeof(u)); + + return f; + } + + inline + float __half2float(__half x) + { + return __internal_half2float(static_cast<__half_raw>(x).x); + } + + inline + float __low2float(__half2 x) + { + return __internal_half2float(static_cast<__half2_raw>(x).x); + } + + inline + float __high2float(__half2 x) + { + return __internal_half2float(static_cast<__half2_raw>(x).y); + } + } // Anonymous namespace. + + #if !defined(HIP_NO_HALF) + using half = __half; + using half2 = __half2; + #endif +#endif // defined(__cplusplus) diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_fp16_math_fwd.h b/src/utils/amd_hip/hip/hcc_detail/hip_fp16_math_fwd.h new file mode 100644 index 000000000..eeb617c40 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_fp16_math_fwd.h @@ -0,0 +1,82 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +// /* +// Half Math Functions +// */ + +#include "host_defines.h" + +extern "C" +{ + __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16); + __device__ _Float16 __ocml_cos_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16); + __device__ __attribute__((const)) + _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16); + __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16); + __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16); + __device__ __attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16); + __device__ _Float16 __ocml_sin_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16); + + typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); + typedef short __2i16 __attribute__((ext_vector_type(2))); + + #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ + __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s); + #endif + + __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); + __device__ __2f16 __ocml_cos_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16); + __device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16); + __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); + __device__ inline + __2f16 __llvm_amdgcn_rcp_2f16(__2f16 x) // Not currently exposed by ROCDL. + { + return __2f16{__llvm_amdgcn_rcp_f16(x.x), __llvm_amdgcn_rcp_f16(x.y)}; + } + __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16); + __device__ __2f16 __ocml_sin_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16); +} diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_ldg.h b/src/utils/amd_hip/hip/hcc_detail/hip_ldg.h new file mode 100644 index 000000000..a5b80b0a1 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_ldg.h @@ -0,0 +1,103 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H + +#if defined(__HCC_OR_HIP_CLANG__) +#if __hcc_workweek__ >= 16164 || defined(__HIP_CLANG_ONLY__) +#include "hip_vector_types.h" +#include "host_defines.h" + +__device__ inline static char __ldg(const char* ptr) { return *ptr; } + +__device__ inline static char2 __ldg(const char2* ptr) { return *ptr; } + +__device__ inline static char4 __ldg(const char4* ptr) { return *ptr; } + +__device__ inline static signed char __ldg(const signed char* ptr) { return ptr[0]; } + +__device__ inline static unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; } + + +__device__ inline static short __ldg(const short* ptr) { return ptr[0]; } + +__device__ inline static short2 __ldg(const short2* ptr) { return ptr[0]; } + +__device__ inline static short4 __ldg(const short4* ptr) { return ptr[0]; } + +__device__ inline static unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; } + + +__device__ inline static int __ldg(const int* ptr) { return ptr[0]; } + +__device__ inline static int2 __ldg(const int2* ptr) { return ptr[0]; } + +__device__ inline static int4 __ldg(const int4* ptr) { return ptr[0]; } + +__device__ inline static unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; } + + +__device__ inline static long __ldg(const long* ptr) { return ptr[0]; } + +__device__ inline static unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; } + + +__device__ inline static long long __ldg(const long long* ptr) { return ptr[0]; } + +__device__ inline static longlong2 __ldg(const longlong2* ptr) { return ptr[0]; } + +__device__ inline static unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; } + + +__device__ inline static uchar2 __ldg(const uchar2* ptr) { return ptr[0]; } + +__device__ inline static uchar4 __ldg(const uchar4* ptr) { return ptr[0]; } + + +__device__ inline static ushort2 __ldg(const ushort2* ptr) { return ptr[0]; } + + +__device__ inline static uint2 __ldg(const uint2* ptr) { return ptr[0]; } + +__device__ inline static uint4 __ldg(const uint4* ptr) { return ptr[0]; } + + +__device__ inline static ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; } + + +__device__ inline static float __ldg(const float* ptr) { return ptr[0]; } + +__device__ inline static float2 __ldg(const float2* ptr) { return ptr[0]; } + +__device__ inline static float4 __ldg(const float4* ptr) { return ptr[0]; } + + +__device__ inline static double __ldg(const double* ptr) { return ptr[0]; } + +__device__ inline static double2 __ldg(const double2* ptr) { return ptr[0]; } + +#endif // __hcc_workweek__ || defined(__HIP_CLANG_ONLY__) + +#endif // defined(__HCC_OR_HIP_CLANG__) + +#endif // HIP_LDG_H diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_memory.h b/src/utils/amd_hip/hip/hcc_detail/hip_memory.h new file mode 100644 index 000000000..866b9e879 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_memory.h @@ -0,0 +1,114 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H + +// Implementation of malloc and free device functions. +// HIP heap is implemented as a global array with fixed size. Users may define +// __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap. + +#if __HCC__ || __HIP__ + +// Size of page in bytes. +#ifndef __HIP_SIZE_OF_PAGE +#define __HIP_SIZE_OF_PAGE 64 +#endif + +// Total number of pages +#ifndef __HIP_NUM_PAGES +#define __HIP_NUM_PAGES (16 * 64 * 64) +#endif + +#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE) + +#if __HIP__ && __HIP_DEVICE_COMPILE__ +__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP]; +__attribute__((weak)) __device__ + uint32_t __hip_device_page_flag[__HIP_NUM_PAGES]; +#else +extern __device__ char __hip_device_heap[]; +extern __device__ uint32_t __hip_device_page_flag[]; +#endif + +extern "C" inline __device__ void* __hip_malloc(size_t size) { + char* heap = (char*)__hip_device_heap; + if (size > __HIP_SIZE_OF_HEAP) { + return (void*)nullptr; + } + uint32_t totalThreads = + hipBlockDim_x * hipGridDim_x * hipBlockDim_y + * hipGridDim_y * hipBlockDim_z * hipGridDim_z; + uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x + + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x + + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x + * hipBlockDim_y; + + uint32_t numHeapsPerWorkItem = __HIP_NUM_PAGES / totalThreads; + uint32_t heapSizePerWorkItem = __HIP_SIZE_OF_HEAP / totalThreads; + + uint32_t stride = size / __HIP_SIZE_OF_PAGE; + uint32_t start = numHeapsPerWorkItem * currentWorkItem; + + uint32_t k = 0; + + while (__hip_device_page_flag[k] > 0) { + k++; + } + + for (uint32_t i = 0; i < stride - 1; i++) { + __hip_device_page_flag[i + start + k] = 1; + } + + __hip_device_page_flag[start + stride - 1 + k] = 2; + + void* ptr = (void*)(heap + + heapSizePerWorkItem * currentWorkItem + k * __HIP_SIZE_OF_PAGE); + + return ptr; +} + +extern "C" inline __device__ void* __hip_free(void* ptr) { + if (ptr == nullptr) { + return nullptr; + } + + uint32_t offsetByte = (uint64_t)ptr - (uint64_t)__hip_device_heap; + uint32_t offsetPage = offsetByte / __HIP_SIZE_OF_PAGE; + + while (__hip_device_page_flag[offsetPage] != 0) { + if (__hip_device_page_flag[offsetPage] == 2) { + __hip_device_page_flag[offsetPage] = 0; + offsetPage++; + break; + } else { + __hip_device_page_flag[offsetPage] = 0; + offsetPage++; + } + } + + return nullptr; +} + +#endif + +#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_prof_api.h b/src/utils/amd_hip/hip/hcc_detail/hip_prof_api.h new file mode 100644 index 000000000..eb3112bdb --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_prof_api.h @@ -0,0 +1,204 @@ +// automatically generated sources +#ifndef _HIP_PROF_API_H +#define _HIP_PROF_API_H + +#include +#include +#include + +#include "hip/hcc_detail/hip_prof_str.h" + +template +class api_callbacks_table_templ { + public: + typedef std::recursive_mutex mutex_t; + + typedef Record record_t; + typedef Fun fun_t; + typedef Act act_t; + + // HIP API callbacks table + struct hip_cb_table_entry_t { + volatile std::atomic sync; + volatile std::atomic sem; + act_t act; + void* a_arg; + fun_t fun; + void* arg; + }; + + struct hip_cb_table_t { + hip_cb_table_entry_t arr[HIP_API_ID_NUMBER]; + }; + + api_callbacks_table_templ() { + memset(&callbacks_table_, 0, sizeof(callbacks_table_)); + } + + bool set_activity(uint32_t id, act_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + if (id == HIP_API_ID_ANY) { + for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_activity(i, fun, arg); + } else if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].act = fun; + callbacks_table_.arr[id].a_arg = arg; + cb_release(id); + } else { + ret = false; + } + return ret; + } + + bool set_callback(uint32_t id, fun_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + if (id == HIP_API_ID_ANY) { + for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_callback(i, fun, arg); + } else if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].fun = fun; + callbacks_table_.arr[id].arg = arg; + cb_release(id); + } else { + ret = false; + } + return ret; + } + + inline hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table_.arr[id]; + } + + inline void sem_sync(const uint32_t& id) { + sem_increment(id); + if (entry(id).sync.load() == true) sync_wait(id); + } + + inline void sem_release(const uint32_t& id) { + sem_decrement(id); + } + + private: + inline void cb_sync(const uint32_t& id) { + entry(id).sync.store(true); + while (entry(id).sem.load() != 0) {} + } + + inline void cb_release(const uint32_t& id) { + entry(id).sync.store(false); + } + + inline void sem_increment(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_add(1); + if (prev == UINT32_MAX) { + std::cerr << "sem overflow id = " << id << std::endl << std::flush; + abort(); + } + } + + inline void sem_decrement(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_sub(1); + if (prev == 0) { + std::cerr << "sem corrupted id = " << id << std::endl << std::flush; + abort(); + } + } + + void sync_wait(const uint32_t& id) { + sem_decrement(id); + while (entry(id).sync.load() == true) {} + sem_increment(id); + } + + mutex_t mutex_; + hip_cb_table_t callbacks_table_; +}; + + +#if USE_PROF_API +#include + +static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API; +typedef activity_record_t hip_api_record_t; +typedef activity_rtapi_callback_t hip_api_callback_t; +typedef activity_sync_callback_t hip_act_callback_t; + +// HIP API callbacks spawner object macro +#define HIP_CB_SPAWNER_OBJECT(CB_ID) \ + hip_api_data_t api_data{}; \ + INIT_CB_ARGS_DATA(CB_ID, api_data); \ + api_callbacks_spawner_t __api_tracer(HIP_API_ID_##CB_ID, api_data); + +typedef api_callbacks_table_templ api_callbacks_table_t; +extern api_callbacks_table_t callbacks_table; + +template +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t(const hip_api_id_t& cid, hip_api_data_t& api_data) : + api_data_(api_data), + record_({}) + { + if (cid_ >= HIP_API_ID_NUMBER) { + fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_); + abort(); + } + callbacks_table.sem_sync(cid_); + + act = entry(cid_).act; + a_arg = entry(cid_).a_arg; + fun = entry(cid_).fun; + arg = entry(cid_).arg; + + api_data_.phase = 0; + if (act != NULL) act(cid_, &record_, &api_data_, a_arg); + if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, &api_data_, arg); + } + + ~api_callbacks_spawner_t() { + api_data_.phase = 1; + if (act != NULL) act(cid_, &record_, &api_data_, a_arg); + if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, &api_data_, arg); + + callbacks_table.sem_release(cid_); + } + + private: + inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table.entry(id); + } + + hip_api_data_t& api_data_; + hip_api_record_t record_; + + hip_act_callback_t act; + void* a_arg; + hip_api_callback_t fun; + void* arg; +}; + +template <> +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t(const hip_api_id_t& cid, hip_api_data_t& api_data) {} +}; + +#else + +#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0) + +class api_callbacks_table_t { + public: + typedef void* act_t; + typedef void* fun_t; + bool set_activity(uint32_t id, act_t fun, void* arg) { return false; } + bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; } +}; + +#endif + +#endif // _HIP_PROF_API_H diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_prof_str.h b/src/utils/amd_hip/hip/hcc_detail/hip_prof_str.h new file mode 100644 index 000000000..3a0d3e08e --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_prof_str.h @@ -0,0 +1,2512 @@ +// automatically generated sources +#ifndef _HIP_PROF_STR_H +#define _HIP_PROF_STR_H +#include +#include + +// Dummy API callbacks definition +#define INIT_NONE_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipHccGetAccelerator_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipHccGetAcceleratorView_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipDeviceCanAccessPeer2_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipMemcpyPeer2_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipMemcpyPeerAsync2_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipCreateTextureObject_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipDestroyTextureObject_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetTextureObjectResourceDesc_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetTextureObjectResourceViewDesc_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetTextureObjectTextureDesc_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipBindTexture_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipBindTexture2D_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipBindTextureToArray_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipBindTextureToMipmappedArray_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipUnbindTexture_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetChannelDesc_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetTextureAlignmentOffset_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetTextureReference_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipTexRefSetFormat_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipTexRefSetFlags_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipTexRefSetFilterMode_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipTexRefSetAddressMode_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipTexRefSetArray_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipTexRefSetAddress_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipTexRefSetAddress2D_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipMemcpyHtoH_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetErrorName_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetErrorString_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipCreateSurfaceObject_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipDestroySurfaceObject_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipStreamCreateWithPriority_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipDeviceGetStreamPriorityRange_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipStreamGetPriority_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetSymbolAddress_CB_ARGS_DATA(cb_data) {}; +#define INIT_hipGetSymbolSize_CB_ARGS_DATA(cb_data) {}; + +// HIP API callbacks ID enumaration +enum hip_api_id_t { + HIP_API_ID_hipHostFree = 0, + HIP_API_ID_hipMemcpyToSymbolAsync = 1, + HIP_API_ID_hipMallocPitch = 2, + HIP_API_ID_hipMalloc = 3, + HIP_API_ID_hipDeviceGetName = 4, + HIP_API_ID_hipEventRecord = 5, + HIP_API_ID_hipCtxSynchronize = 6, + HIP_API_ID_hipSetDevice = 7, + HIP_API_ID_hipSetupArgument = 8, + HIP_API_ID_hipMemcpyFromSymbolAsync = 9, + HIP_API_ID_hipMemcpyDtoD = 10, + HIP_API_ID_hipMemcpy2DToArray = 11, + HIP_API_ID_hipCtxGetCacheConfig = 12, + HIP_API_ID_hipStreamWaitEvent = 13, + HIP_API_ID_hipModuleLoad = 14, + HIP_API_ID_hipDevicePrimaryCtxSetFlags = 15, + HIP_API_ID_hipMemcpyAsync = 16, + HIP_API_ID_hipMalloc3DArray = 17, + HIP_API_ID_hipStreamCreate = 18, + HIP_API_ID_hipCtxGetCurrent = 19, + HIP_API_ID_hipDevicePrimaryCtxGetState = 20, + HIP_API_ID_hipEventQuery = 21, + HIP_API_ID_hipEventCreate = 22, + HIP_API_ID_hipMemGetAddressRange = 23, + HIP_API_ID_hipMemcpyFromSymbol = 24, + HIP_API_ID_hipArrayCreate = 25, + HIP_API_ID_hipStreamGetFlags = 26, + HIP_API_ID_hipMallocArray = 27, + HIP_API_ID_hipCtxGetSharedMemConfig = 28, + HIP_API_ID_hipMemPtrGetInfo = 29, + HIP_API_ID_hipCtxGetFlags = 30, + HIP_API_ID_hipStreamDestroy = 31, + HIP_API_ID_hipMemset3DAsync = 32, + HIP_API_ID_hipMemcpy3D = 33, + HIP_API_ID_hipInit = 34, + HIP_API_ID_hipMemcpyAtoH = 35, + HIP_API_ID_hipMemset2D = 36, + HIP_API_ID_hipMemset2DAsync = 37, + HIP_API_ID_hipDeviceCanAccessPeer = 38, + HIP_API_ID_hipDeviceEnablePeerAccess = 39, + HIP_API_ID_hipModuleUnload = 40, + HIP_API_ID_hipHostUnregister = 41, + HIP_API_ID_hipProfilerStop = 42, + HIP_API_ID_hipLaunchByPtr = 43, + HIP_API_ID_hipStreamSynchronize = 44, + HIP_API_ID_hipFreeHost = 45, + HIP_API_ID_hipRemoveApiCallback = 46, + HIP_API_ID_hipDeviceSetCacheConfig = 47, + HIP_API_ID_hipCtxGetApiVersion = 48, + HIP_API_ID_hipMemcpyHtoD = 49, + HIP_API_ID_hipModuleGetGlobal = 50, + HIP_API_ID_hipMemcpyHtoA = 51, + HIP_API_ID_hipCtxCreate = 52, + HIP_API_ID_hipMemcpy2D = 53, + HIP_API_ID_hipIpcCloseMemHandle = 54, + HIP_API_ID_hipChooseDevice = 55, + HIP_API_ID_hipDeviceSetSharedMemConfig = 56, + HIP_API_ID_hipDeviceComputeCapability = 57, + HIP_API_ID_hipRegisterApiCallback = 58, + HIP_API_ID_hipDeviceGet = 59, + HIP_API_ID_hipProfilerStart = 60, + HIP_API_ID_hipCtxSetCacheConfig = 61, + HIP_API_ID_hipFuncSetCacheConfig = 62, + HIP_API_ID_hipMemcpyPeerAsync = 63, + HIP_API_ID_hipEventElapsedTime = 64, + HIP_API_ID_hipDevicePrimaryCtxReset = 65, + HIP_API_ID_hipEventDestroy = 66, + HIP_API_ID_hipCtxPopCurrent = 67, + HIP_API_ID_hipHostGetFlags = 68, + HIP_API_ID_hipHostMalloc = 69, + HIP_API_ID_hipDriverGetVersion = 70, + HIP_API_ID_hipMemGetInfo = 71, + HIP_API_ID_hipDeviceReset = 72, + HIP_API_ID_hipMemset = 73, + HIP_API_ID_hipMemsetD8 = 74, + HIP_API_ID_hipHostRegister = 75, + HIP_API_ID_hipCtxSetSharedMemConfig = 76, + HIP_API_ID_hipArray3DCreate = 77, + HIP_API_ID_hipIpcOpenMemHandle = 78, + HIP_API_ID_hipGetLastError = 79, + HIP_API_ID_hipCtxDestroy = 80, + HIP_API_ID_hipDeviceGetSharedMemConfig = 81, + HIP_API_ID_hipRegisterActivityCallback = 82, + HIP_API_ID_hipSetDeviceFlags = 83, + HIP_API_ID_hipFree = 84, + HIP_API_ID_hipDeviceGetAttribute = 85, + HIP_API_ID_hipMemcpyDtoH = 86, + HIP_API_ID_hipCtxDisablePeerAccess = 87, + HIP_API_ID_hipDeviceGetByPCIBusId = 88, + HIP_API_ID_hipIpcGetMemHandle = 89, + HIP_API_ID_hipMemcpyHtoDAsync = 90, + HIP_API_ID_hipCtxGetDevice = 91, + HIP_API_ID_hipMemset3D = 92, + HIP_API_ID_hipModuleLoadData = 93, + HIP_API_ID_hipDeviceTotalMem = 94, + HIP_API_ID_hipCtxSetCurrent = 95, + HIP_API_ID_hipMallocHost = 96, + HIP_API_ID_hipDevicePrimaryCtxRetain = 97, + HIP_API_ID_hipDeviceDisablePeerAccess = 98, + HIP_API_ID_hipStreamCreateWithFlags = 99, + HIP_API_ID_hipMemcpyFromArray = 100, + HIP_API_ID_hipMemcpy2DAsync = 101, + HIP_API_ID_hipFuncGetAttributes = 102, + HIP_API_ID_hipEventCreateWithFlags = 103, + HIP_API_ID_hipStreamQuery = 104, + HIP_API_ID_hipDeviceGetPCIBusId = 105, + HIP_API_ID_hipMemcpy = 106, + HIP_API_ID_hipPeekAtLastError = 107, + HIP_API_ID_hipHostAlloc = 108, + HIP_API_ID_hipStreamAddCallback = 109, + HIP_API_ID_hipMemcpyToArray = 110, + HIP_API_ID_hipDeviceSynchronize = 111, + HIP_API_ID_hipDeviceGetCacheConfig = 112, + HIP_API_ID_hipMalloc3D = 113, + HIP_API_ID_hipPointerGetAttributes = 114, + HIP_API_ID_hipMemsetAsync = 115, + HIP_API_ID_hipMemcpyToSymbol = 116, + HIP_API_ID_hipCtxPushCurrent = 117, + HIP_API_ID_hipMemcpyPeer = 118, + HIP_API_ID_hipEventSynchronize = 119, + HIP_API_ID_hipMemcpyDtoDAsync = 120, + HIP_API_ID_hipCtxEnablePeerAccess = 121, + HIP_API_ID_hipMemcpyDtoHAsync = 122, + HIP_API_ID_hipModuleLaunchKernel = 123, + HIP_API_ID_hipModuleGetTexRef = 124, + HIP_API_ID_hipRemoveActivityCallback = 125, + HIP_API_ID_hipDeviceGetLimit = 126, + HIP_API_ID_hipModuleLoadDataEx = 127, + HIP_API_ID_hipRuntimeGetVersion = 128, + HIP_API_ID_hipGetDeviceProperties = 129, + HIP_API_ID_hipFreeArray = 130, + HIP_API_ID_hipDevicePrimaryCtxRelease = 131, + HIP_API_ID_hipHostGetDevicePointer = 132, + HIP_API_ID_hipMemcpyParam2D = 133, + HIP_API_ID_hipConfigureCall = 134, + HIP_API_ID_hipModuleGetFunction = 135, + HIP_API_ID_hipGetDevice = 136, + HIP_API_ID_hipGetDeviceCount = 137, + HIP_API_ID_hipHccModuleLaunchKernel = 138, + HIP_API_ID_NUMBER = 139, + HIP_API_ID_ANY = 140, + + HIP_API_ID_NONE = HIP_API_ID_NUMBER, + HIP_API_ID_hipHccGetAccelerator = HIP_API_ID_NUMBER, + HIP_API_ID_hipHccGetAcceleratorView = HIP_API_ID_NUMBER, + HIP_API_ID_hipDeviceCanAccessPeer2 = HIP_API_ID_NUMBER, + HIP_API_ID_hipMemcpyPeer2 = HIP_API_ID_NUMBER, + HIP_API_ID_hipMemcpyPeerAsync2 = HIP_API_ID_NUMBER, + HIP_API_ID_hipCreateTextureObject = HIP_API_ID_NUMBER, + HIP_API_ID_hipDestroyTextureObject = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetTextureObjectResourceDesc = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetTextureObjectResourceViewDesc = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetTextureObjectTextureDesc = HIP_API_ID_NUMBER, + HIP_API_ID_hipBindTexture = HIP_API_ID_NUMBER, + HIP_API_ID_hipBindTexture2D = HIP_API_ID_NUMBER, + HIP_API_ID_hipBindTextureToArray = HIP_API_ID_NUMBER, + HIP_API_ID_hipBindTextureToMipmappedArray = HIP_API_ID_NUMBER, + HIP_API_ID_hipUnbindTexture = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetChannelDesc = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetTextureAlignmentOffset = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetTextureReference = HIP_API_ID_NUMBER, + HIP_API_ID_hipTexRefSetFormat = HIP_API_ID_NUMBER, + HIP_API_ID_hipTexRefSetFlags = HIP_API_ID_NUMBER, + HIP_API_ID_hipTexRefSetFilterMode = HIP_API_ID_NUMBER, + HIP_API_ID_hipTexRefSetAddressMode = HIP_API_ID_NUMBER, + HIP_API_ID_hipTexRefSetArray = HIP_API_ID_NUMBER, + HIP_API_ID_hipTexRefSetAddress = HIP_API_ID_NUMBER, + HIP_API_ID_hipTexRefSetAddress2D = HIP_API_ID_NUMBER, + HIP_API_ID_hipMemcpyHtoH = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetErrorName = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetErrorString = HIP_API_ID_NUMBER, + HIP_API_ID_hipCreateSurfaceObject = HIP_API_ID_NUMBER, + HIP_API_ID_hipDestroySurfaceObject = HIP_API_ID_NUMBER, + HIP_API_ID_hipStreamCreateWithPriority = HIP_API_ID_NUMBER, + HIP_API_ID_hipDeviceGetStreamPriorityRange = HIP_API_ID_NUMBER, + HIP_API_ID_hipStreamGetPriority = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetSymbolAddress = HIP_API_ID_NUMBER, + HIP_API_ID_hipGetSymbolSize = HIP_API_ID_NUMBER, +}; + +// Return HIP API string +static const char* hip_api_name(const uint32_t& id) { + switch(id) { + case HIP_API_ID_hipHostFree: return "hipHostFree"; + case HIP_API_ID_hipMemcpyToSymbolAsync: return "hipMemcpyToSymbolAsync"; + case HIP_API_ID_hipMallocPitch: return "hipMallocPitch"; + case HIP_API_ID_hipMalloc: return "hipMalloc"; + case HIP_API_ID_hipDeviceGetName: return "hipDeviceGetName"; + case HIP_API_ID_hipEventRecord: return "hipEventRecord"; + case HIP_API_ID_hipCtxSynchronize: return "hipCtxSynchronize"; + case HIP_API_ID_hipSetDevice: return "hipSetDevice"; + case HIP_API_ID_hipSetupArgument: return "hipSetupArgument"; + case HIP_API_ID_hipMemcpyFromSymbolAsync: return "hipMemcpyFromSymbolAsync"; + case HIP_API_ID_hipMemcpyDtoD: return "hipMemcpyDtoD"; + case HIP_API_ID_hipMemcpy2DToArray: return "hipMemcpy2DToArray"; + case HIP_API_ID_hipCtxGetCacheConfig: return "hipCtxGetCacheConfig"; + case HIP_API_ID_hipStreamWaitEvent: return "hipStreamWaitEvent"; + case HIP_API_ID_hipModuleLoad: return "hipModuleLoad"; + case HIP_API_ID_hipDevicePrimaryCtxSetFlags: return "hipDevicePrimaryCtxSetFlags"; + case HIP_API_ID_hipMemcpyAsync: return "hipMemcpyAsync"; + case HIP_API_ID_hipMalloc3DArray: return "hipMalloc3DArray"; + case HIP_API_ID_hipStreamCreate: return "hipStreamCreate"; + case HIP_API_ID_hipCtxGetCurrent: return "hipCtxGetCurrent"; + case HIP_API_ID_hipDevicePrimaryCtxGetState: return "hipDevicePrimaryCtxGetState"; + case HIP_API_ID_hipEventQuery: return "hipEventQuery"; + case HIP_API_ID_hipEventCreate: return "hipEventCreate"; + case HIP_API_ID_hipMemGetAddressRange: return "hipMemGetAddressRange"; + case HIP_API_ID_hipMemcpyFromSymbol: return "hipMemcpyFromSymbol"; + case HIP_API_ID_hipArrayCreate: return "hipArrayCreate"; + case HIP_API_ID_hipStreamGetFlags: return "hipStreamGetFlags"; + case HIP_API_ID_hipMallocArray: return "hipMallocArray"; + case HIP_API_ID_hipCtxGetSharedMemConfig: return "hipCtxGetSharedMemConfig"; + case HIP_API_ID_hipMemPtrGetInfo: return "hipMemPtrGetInfo"; + case HIP_API_ID_hipCtxGetFlags: return "hipCtxGetFlags"; + case HIP_API_ID_hipStreamDestroy: return "hipStreamDestroy"; + case HIP_API_ID_hipMemset3DAsync: return "hipMemset3DAsync"; + case HIP_API_ID_hipMemcpy3D: return "hipMemcpy3D"; + case HIP_API_ID_hipInit: return "hipInit"; + case HIP_API_ID_hipMemcpyAtoH: return "hipMemcpyAtoH"; + case HIP_API_ID_hipMemset2D: return "hipMemset2D"; + case HIP_API_ID_hipMemset2DAsync: return "hipMemset2DAsync"; + case HIP_API_ID_hipDeviceCanAccessPeer: return "hipDeviceCanAccessPeer"; + case HIP_API_ID_hipDeviceEnablePeerAccess: return "hipDeviceEnablePeerAccess"; + case HIP_API_ID_hipModuleUnload: return "hipModuleUnload"; + case HIP_API_ID_hipHostUnregister: return "hipHostUnregister"; + case HIP_API_ID_hipProfilerStop: return "hipProfilerStop"; + case HIP_API_ID_hipLaunchByPtr: return "hipLaunchByPtr"; + case HIP_API_ID_hipStreamSynchronize: return "hipStreamSynchronize"; + case HIP_API_ID_hipFreeHost: return "hipFreeHost"; + case HIP_API_ID_hipRemoveApiCallback: return "hipRemoveApiCallback"; + case HIP_API_ID_hipDeviceSetCacheConfig: return "hipDeviceSetCacheConfig"; + case HIP_API_ID_hipCtxGetApiVersion: return "hipCtxGetApiVersion"; + case HIP_API_ID_hipMemcpyHtoD: return "hipMemcpyHtoD"; + case HIP_API_ID_hipModuleGetGlobal: return "hipModuleGetGlobal"; + case HIP_API_ID_hipMemcpyHtoA: return "hipMemcpyHtoA"; + case HIP_API_ID_hipCtxCreate: return "hipCtxCreate"; + case HIP_API_ID_hipMemcpy2D: return "hipMemcpy2D"; + case HIP_API_ID_hipIpcCloseMemHandle: return "hipIpcCloseMemHandle"; + case HIP_API_ID_hipChooseDevice: return "hipChooseDevice"; + case HIP_API_ID_hipDeviceSetSharedMemConfig: return "hipDeviceSetSharedMemConfig"; + case HIP_API_ID_hipDeviceComputeCapability: return "hipDeviceComputeCapability"; + case HIP_API_ID_hipRegisterApiCallback: return "hipRegisterApiCallback"; + case HIP_API_ID_hipDeviceGet: return "hipDeviceGet"; + case HIP_API_ID_hipProfilerStart: return "hipProfilerStart"; + case HIP_API_ID_hipCtxSetCacheConfig: return "hipCtxSetCacheConfig"; + case HIP_API_ID_hipFuncSetCacheConfig: return "hipFuncSetCacheConfig"; + case HIP_API_ID_hipMemcpyPeerAsync: return "hipMemcpyPeerAsync"; + case HIP_API_ID_hipEventElapsedTime: return "hipEventElapsedTime"; + case HIP_API_ID_hipDevicePrimaryCtxReset: return "hipDevicePrimaryCtxReset"; + case HIP_API_ID_hipEventDestroy: return "hipEventDestroy"; + case HIP_API_ID_hipCtxPopCurrent: return "hipCtxPopCurrent"; + case HIP_API_ID_hipHostGetFlags: return "hipHostGetFlags"; + case HIP_API_ID_hipHostMalloc: return "hipHostMalloc"; + case HIP_API_ID_hipDriverGetVersion: return "hipDriverGetVersion"; + case HIP_API_ID_hipMemGetInfo: return "hipMemGetInfo"; + case HIP_API_ID_hipDeviceReset: return "hipDeviceReset"; + case HIP_API_ID_hipMemset: return "hipMemset"; + case HIP_API_ID_hipMemsetD8: return "hipMemsetD8"; + case HIP_API_ID_hipHostRegister: return "hipHostRegister"; + case HIP_API_ID_hipCtxSetSharedMemConfig: return "hipCtxSetSharedMemConfig"; + case HIP_API_ID_hipArray3DCreate: return "hipArray3DCreate"; + case HIP_API_ID_hipIpcOpenMemHandle: return "hipIpcOpenMemHandle"; + case HIP_API_ID_hipGetLastError: return "hipGetLastError"; + case HIP_API_ID_hipCtxDestroy: return "hipCtxDestroy"; + case HIP_API_ID_hipDeviceGetSharedMemConfig: return "hipDeviceGetSharedMemConfig"; + case HIP_API_ID_hipRegisterActivityCallback: return "hipRegisterActivityCallback"; + case HIP_API_ID_hipSetDeviceFlags: return "hipSetDeviceFlags"; + case HIP_API_ID_hipFree: return "hipFree"; + case HIP_API_ID_hipDeviceGetAttribute: return "hipDeviceGetAttribute"; + case HIP_API_ID_hipMemcpyDtoH: return "hipMemcpyDtoH"; + case HIP_API_ID_hipCtxDisablePeerAccess: return "hipCtxDisablePeerAccess"; + case HIP_API_ID_hipDeviceGetByPCIBusId: return "hipDeviceGetByPCIBusId"; + case HIP_API_ID_hipIpcGetMemHandle: return "hipIpcGetMemHandle"; + case HIP_API_ID_hipMemcpyHtoDAsync: return "hipMemcpyHtoDAsync"; + case HIP_API_ID_hipCtxGetDevice: return "hipCtxGetDevice"; + case HIP_API_ID_hipMemset3D: return "hipMemset3D"; + case HIP_API_ID_hipModuleLoadData: return "hipModuleLoadData"; + case HIP_API_ID_hipDeviceTotalMem: return "hipDeviceTotalMem"; + case HIP_API_ID_hipCtxSetCurrent: return "hipCtxSetCurrent"; + case HIP_API_ID_hipMallocHost: return "hipMallocHost"; + case HIP_API_ID_hipDevicePrimaryCtxRetain: return "hipDevicePrimaryCtxRetain"; + case HIP_API_ID_hipDeviceDisablePeerAccess: return "hipDeviceDisablePeerAccess"; + case HIP_API_ID_hipStreamCreateWithFlags: return "hipStreamCreateWithFlags"; + case HIP_API_ID_hipMemcpyFromArray: return "hipMemcpyFromArray"; + case HIP_API_ID_hipMemcpy2DAsync: return "hipMemcpy2DAsync"; + case HIP_API_ID_hipFuncGetAttributes: return "hipFuncGetAttributes"; + case HIP_API_ID_hipEventCreateWithFlags: return "hipEventCreateWithFlags"; + case HIP_API_ID_hipStreamQuery: return "hipStreamQuery"; + case HIP_API_ID_hipDeviceGetPCIBusId: return "hipDeviceGetPCIBusId"; + case HIP_API_ID_hipMemcpy: return "hipMemcpy"; + case HIP_API_ID_hipPeekAtLastError: return "hipPeekAtLastError"; + case HIP_API_ID_hipHostAlloc: return "hipHostAlloc"; + case HIP_API_ID_hipStreamAddCallback: return "hipStreamAddCallback"; + case HIP_API_ID_hipMemcpyToArray: return "hipMemcpyToArray"; + case HIP_API_ID_hipDeviceSynchronize: return "hipDeviceSynchronize"; + case HIP_API_ID_hipDeviceGetCacheConfig: return "hipDeviceGetCacheConfig"; + case HIP_API_ID_hipMalloc3D: return "hipMalloc3D"; + case HIP_API_ID_hipPointerGetAttributes: return "hipPointerGetAttributes"; + case HIP_API_ID_hipMemsetAsync: return "hipMemsetAsync"; + case HIP_API_ID_hipMemcpyToSymbol: return "hipMemcpyToSymbol"; + case HIP_API_ID_hipCtxPushCurrent: return "hipCtxPushCurrent"; + case HIP_API_ID_hipMemcpyPeer: return "hipMemcpyPeer"; + case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize"; + case HIP_API_ID_hipMemcpyDtoDAsync: return "hipMemcpyDtoDAsync"; + case HIP_API_ID_hipCtxEnablePeerAccess: return "hipCtxEnablePeerAccess"; + case HIP_API_ID_hipMemcpyDtoHAsync: return "hipMemcpyDtoHAsync"; + case HIP_API_ID_hipModuleLaunchKernel: return "hipModuleLaunchKernel"; + case HIP_API_ID_hipModuleGetTexRef: return "hipModuleGetTexRef"; + case HIP_API_ID_hipRemoveActivityCallback: return "hipRemoveActivityCallback"; + case HIP_API_ID_hipDeviceGetLimit: return "hipDeviceGetLimit"; + case HIP_API_ID_hipModuleLoadDataEx: return "hipModuleLoadDataEx"; + case HIP_API_ID_hipRuntimeGetVersion: return "hipRuntimeGetVersion"; + case HIP_API_ID_hipGetDeviceProperties: return "hipGetDeviceProperties"; + case HIP_API_ID_hipFreeArray: return "hipFreeArray"; + case HIP_API_ID_hipDevicePrimaryCtxRelease: return "hipDevicePrimaryCtxRelease"; + case HIP_API_ID_hipHostGetDevicePointer: return "hipHostGetDevicePointer"; + case HIP_API_ID_hipMemcpyParam2D: return "hipMemcpyParam2D"; + case HIP_API_ID_hipConfigureCall: return "hipConfigureCall"; + case HIP_API_ID_hipModuleGetFunction: return "hipModuleGetFunction"; + case HIP_API_ID_hipGetDevice: return "hipGetDevice"; + case HIP_API_ID_hipGetDeviceCount: return "hipGetDeviceCount"; + }; + return "unknown"; +}; + +// HIP API callbacks data structure +struct hip_api_data_t { + uint64_t correlation_id; + uint32_t phase; + union { + struct { + void* ptr; + } hipHostFree; + struct { + const void* symbolName; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyToSymbolAsync; + struct { + void** ptr; + size_t* pitch; + size_t width; + size_t height; + } hipMallocPitch; + struct { + void** ptr; + size_t size; + } hipMalloc; + struct { + char* name; + int len; + hipDevice_t device; + } hipDeviceGetName; + struct { + hipEvent_t event; + hipStream_t stream; + } hipEventRecord; + struct { + int deviceId; + } hipSetDevice; + struct { + const void* arg; + size_t size; + size_t offset; + } hipSetupArgument; + struct { + void* dst; + const void* symbolName; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyFromSymbolAsync; + struct { + hipDeviceptr_t dst; + hipDeviceptr_t src; + size_t sizeBytes; + } hipMemcpyDtoD; + struct { + hipArray* dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DToArray; + struct { + hipFuncCache_t* cacheConfig; + } hipCtxGetCacheConfig; + struct { + hipStream_t stream; + hipEvent_t event; + unsigned int flags; + } hipStreamWaitEvent; + struct { + hipModule_t* module; + const char* fname; + } hipModuleLoad; + struct { + hipDevice_t dev; + unsigned int flags; + } hipDevicePrimaryCtxSetFlags; + struct { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyAsync; + struct { + hipArray** array; + const hipChannelFormatDesc* desc; + hipExtent extent; + unsigned int flags; + } hipMalloc3DArray; + struct { + hipStream_t* stream; + } hipStreamCreate; + struct { + hipCtx_t* ctx; + } hipCtxGetCurrent; + struct { + hipDevice_t dev; + unsigned int* flags; + int* active; + } hipDevicePrimaryCtxGetState; + struct { + hipEvent_t event; + } hipEventQuery; + struct { + hipEvent_t* event; + } hipEventCreate; + struct { + hipDeviceptr_t* pbase; + size_t* psize; + hipDeviceptr_t dptr; + } hipMemGetAddressRange; + struct { + void* dst; + const void* symbolName; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyFromSymbol; + struct { + hipArray** pHandle; + const HIP_ARRAY_DESCRIPTOR* pAllocateArray; + } hipArrayCreate; + struct { + hipStream_t stream; + unsigned int* flags; + } hipStreamGetFlags; + struct { + hipArray** array; + const hipChannelFormatDesc* desc; + size_t width; + size_t height; + unsigned int flags; + } hipMallocArray; + struct { + hipSharedMemConfig* pConfig; + } hipCtxGetSharedMemConfig; + struct { + void* ptr; + size_t* size; + } hipMemPtrGetInfo; + struct { + unsigned int* flags; + } hipCtxGetFlags; + struct { + hipStream_t stream; + } hipStreamDestroy; + struct { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + hipStream_t stream; + } hipMemset3DAsync; + struct { + const hipMemcpy3DParms* p; + } hipMemcpy3D; + struct { + unsigned int flags; + } hipInit; + struct { + void* dst; + hipArray* srcArray; + size_t srcOffset; + size_t count; + } hipMemcpyAtoH; + struct { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + } hipMemset2D; + struct { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemset2DAsync; + struct { + int* canAccessPeer; + int deviceId; + int peerDeviceId; + } hipDeviceCanAccessPeer; + struct { + int peerDeviceId; + unsigned int flags; + } hipDeviceEnablePeerAccess; + struct { + hipModule_t module; + } hipModuleUnload; + struct { + void* hostPtr; + } hipHostUnregister; + struct { + const void* func; + } hipLaunchByPtr; + struct { + hipStream_t stream; + } hipStreamSynchronize; + struct { + void* ptr; + } hipFreeHost; + struct { + uint32_t id; + } hipRemoveApiCallback; + struct { + hipFuncCache_t cacheConfig; + } hipDeviceSetCacheConfig; + struct { + hipCtx_t ctx; + int* apiVersion; + } hipCtxGetApiVersion; + struct { + hipDeviceptr_t dst; + void* src; + size_t sizeBytes; + } hipMemcpyHtoD; + struct { + hipDeviceptr_t* dptr; + size_t* bytes; + hipModule_t hmod; + const char* name; + } hipModuleGetGlobal; + struct { + hipArray* dstArray; + size_t dstOffset; + const void* srcHost; + size_t count; + } hipMemcpyHtoA; + struct { + hipCtx_t* ctx; + unsigned int flags; + hipDevice_t device; + } hipCtxCreate; + struct { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2D; + struct { + void* devPtr; + } hipIpcCloseMemHandle; + struct { + int* device; + const hipDeviceProp_t* prop; + } hipChooseDevice; + struct { + hipSharedMemConfig config; + } hipDeviceSetSharedMemConfig; + struct { + int* major; + int* minor; + hipDevice_t device; + } hipDeviceComputeCapability; + struct { + uint32_t id; + void* fun; + void* arg; + } hipRegisterApiCallback; + struct { + hipDevice_t* device; + int ordinal; + } hipDeviceGet; + struct { + hipFuncCache_t cacheConfig; + } hipCtxSetCacheConfig; + struct { + const void* func; + hipFuncCache_t config; + } hipFuncSetCacheConfig; + struct { + void* dst; + int dstDeviceId; + const void* src; + int srcDevice; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyPeerAsync; + struct { + float* ms; + hipEvent_t start; + hipEvent_t stop; + } hipEventElapsedTime; + struct { + hipDevice_t dev; + } hipDevicePrimaryCtxReset; + struct { + hipEvent_t event; + } hipEventDestroy; + struct { + hipCtx_t* ctx; + } hipCtxPopCurrent; + struct { + unsigned int* flagsPtr; + void* hostPtr; + } hipHostGetFlags; + struct { + void** ptr; + size_t size; + unsigned int flags; + } hipHostMalloc; + struct { + int* driverVersion; + } hipDriverGetVersion; + struct { + size_t* free; + size_t* total; + } hipMemGetInfo; + struct { + void* dst; + int value; + size_t sizeBytes; + } hipMemset; + struct { + hipDeviceptr_t dest; + unsigned char value; + size_t sizeBytes; + } hipMemsetD8; + struct { + void* hostPtr; + size_t sizeBytes; + unsigned int flags; + } hipHostRegister; + struct { + hipSharedMemConfig config; + } hipCtxSetSharedMemConfig; + struct { + hipArray** array; + const HIP_ARRAY_DESCRIPTOR* pAllocateArray; + } hipArray3DCreate; + struct { + void** devPtr; + hipIpcMemHandle_t handle; + unsigned int flags; + } hipIpcOpenMemHandle; + struct { + hipCtx_t ctx; + } hipCtxDestroy; + struct { + hipSharedMemConfig* pConfig; + } hipDeviceGetSharedMemConfig; + struct { + uint32_t id; + void* fun; + void* arg; + } hipRegisterActivityCallback; + struct { + unsigned flags; + } hipSetDeviceFlags; + struct { + void* ptr; + } hipFree; + struct { + int* pi; + hipDeviceAttribute_t attr; + int deviceId; + } hipDeviceGetAttribute; + struct { + void* dst; + hipDeviceptr_t src; + size_t sizeBytes; + } hipMemcpyDtoH; + struct { + hipCtx_t peerCtx; + } hipCtxDisablePeerAccess; + struct { + int* device; + const char* pciBusId; + } hipDeviceGetByPCIBusId; + struct { + hipIpcMemHandle_t* handle; + void* devPtr; + } hipIpcGetMemHandle; + struct { + hipDeviceptr_t dst; + void* src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyHtoDAsync; + struct { + hipDevice_t* device; + } hipCtxGetDevice; + struct { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + } hipMemset3D; + struct { + hipModule_t* module; + const void* image; + } hipModuleLoadData; + struct { + size_t* bytes; + hipDevice_t device; + } hipDeviceTotalMem; + struct { + hipCtx_t ctx; + } hipCtxSetCurrent; + struct { + void** ptr; + size_t size; + } hipMallocHost; + struct { + hipCtx_t* pctx; + hipDevice_t dev; + } hipDevicePrimaryCtxRetain; + struct { + int peerDeviceId; + } hipDeviceDisablePeerAccess; + struct { + hipStream_t* stream; + unsigned int flags; + } hipStreamCreateWithFlags; + struct { + void* dst; + hipArray_const_t srcArray; + size_t wOffset; + size_t hOffset; + size_t count; + hipMemcpyKind kind; + } hipMemcpyFromArray; + struct { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DAsync; + struct { + hipFuncAttributes* attr; + const void* func; + } hipFuncGetAttributes; + struct { + hipEvent_t* event; + unsigned flags; + } hipEventCreateWithFlags; + struct { + hipStream_t stream; + } hipStreamQuery; + struct { + char* pciBusId; + int len; + int device; + } hipDeviceGetPCIBusId; + struct { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + } hipMemcpy; + struct { + void** ptr; + size_t size; + unsigned int flags; + } hipHostAlloc; + struct { + hipStream_t stream; + hipStreamCallback_t callback; + void* userData; + unsigned int flags; + } hipStreamAddCallback; + struct { + hipArray* dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipMemcpyToArray; + struct { + hipFuncCache_t* cacheConfig; + } hipDeviceGetCacheConfig; + struct { + hipPitchedPtr* pitchedDevPtr; + hipExtent extent; + } hipMalloc3D; + struct { + hipPointerAttribute_t* attributes; + const void* ptr; + } hipPointerGetAttributes; + struct { + void* dst; + int value; + size_t sizeBytes; + hipStream_t stream; + } hipMemsetAsync; + struct { + const void* symbolName; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyToSymbol; + struct { + hipCtx_t ctx; + } hipCtxPushCurrent; + struct { + void* dst; + int dstDeviceId; + const void* src; + int srcDeviceId; + size_t sizeBytes; + } hipMemcpyPeer; + struct { + hipEvent_t event; + } hipEventSynchronize; + struct { + hipDeviceptr_t dst; + hipDeviceptr_t src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyDtoDAsync; + struct { + hipCtx_t peerCtx; + unsigned int flags; + } hipCtxEnablePeerAccess; + struct { + void* dst; + hipDeviceptr_t src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyDtoHAsync; + struct { + hipFunction_t f; + unsigned int gridDimX; + unsigned int gridDimY; + unsigned int gridDimZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + unsigned int sharedMemBytes; + hipStream_t stream; + void** kernelParams; + void** extra; + } hipModuleLaunchKernel; + struct { + hipFunction_t f; + } hipHccModuleLaunchKernel; + struct { + textureReference** texRef; + hipModule_t hmod; + const char* name; + } hipModuleGetTexRef; + struct { + uint32_t id; + } hipRemoveActivityCallback; + struct { + size_t* pValue; + hipLimit_t limit; + } hipDeviceGetLimit; + struct { + hipModule_t* module; + const void* image; + unsigned int numOptions; + hipJitOption* options; + void** optionValues; + } hipModuleLoadDataEx; + struct { + int* runtimeVersion; + } hipRuntimeGetVersion; + struct { + hipDeviceProp_t* prop; + int deviceId; + } hipGetDeviceProperties; + struct { + hipArray* array; + } hipFreeArray; + struct { + hipDevice_t dev; + } hipDevicePrimaryCtxRelease; + struct { + void** devPtr; + void* hstPtr; + unsigned int flags; + } hipHostGetDevicePointer; + struct { + const hip_Memcpy2D* pCopy; + } hipMemcpyParam2D; + struct { + dim3 gridDim; + dim3 blockDim; + size_t sharedMem; + hipStream_t stream; + } hipConfigureCall; + struct { + hipFunction_t* function; + hipModule_t module; + const char* kname; + } hipModuleGetFunction; + struct { + int* deviceId; + } hipGetDevice; + struct { + int* count; + } hipGetDeviceCount; + } args; +}; + +// HIP API callbacks args data filling macros +#define INIT_hipHostFree_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostFree.ptr = (void*)ptr; \ +}; +#define INIT_hipMemcpyToSymbolAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyToSymbolAsync.symbolName = (const void*)symbolName; \ + cb_data.args.hipMemcpyToSymbolAsync.src = (const void*)src; \ + cb_data.args.hipMemcpyToSymbolAsync.sizeBytes = (size_t)count; \ + cb_data.args.hipMemcpyToSymbolAsync.offset = (size_t)offset; \ + cb_data.args.hipMemcpyToSymbolAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyToSymbolAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipMallocPitch_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocPitch.ptr = (void**)ptr; \ + cb_data.args.hipMallocPitch.pitch = (size_t*)pitch; \ + cb_data.args.hipMallocPitch.width = (size_t)width; \ + cb_data.args.hipMallocPitch.height = (size_t)height; \ +}; +#define INIT_hipMalloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMalloc.ptr = (void**)ptr; \ + cb_data.args.hipMalloc.size = (size_t)sizeBytes; \ +}; +#define INIT_hipDeviceGetName_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetName.name = (char*)name; \ + cb_data.args.hipDeviceGetName.len = (int)len; \ + cb_data.args.hipDeviceGetName.device = (hipDevice_t)device; \ +}; +#define INIT_hipEventRecord_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventRecord.event = (hipEvent_t)event; \ + cb_data.args.hipEventRecord.stream = (hipStream_t)stream; \ +}; +#define INIT_hipCtxSynchronize_CB_ARGS_DATA(cb_data) { \ +}; +#define INIT_hipSetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetDevice.deviceId = (int)deviceId; \ +}; +#define INIT_hipSetupArgument_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetupArgument.arg = (const void*)arg; \ + cb_data.args.hipSetupArgument.size = (size_t)size; \ + cb_data.args.hipSetupArgument.offset = (size_t)offset; \ +}; +#define INIT_hipMemcpyFromSymbolAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyFromSymbolAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyFromSymbolAsync.symbolName = (const void*)symbolName; \ + cb_data.args.hipMemcpyFromSymbolAsync.sizeBytes = (size_t)count; \ + cb_data.args.hipMemcpyFromSymbolAsync.offset = (size_t)offset; \ + cb_data.args.hipMemcpyFromSymbolAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyFromSymbolAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipMemcpyDtoD_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoD.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemcpyDtoD.src = (hipDeviceptr_t)src; \ + cb_data.args.hipMemcpyDtoD.sizeBytes = (size_t)sizeBytes; \ +}; +#define INIT_hipMemcpy2DToArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DToArray.dst = (hipArray*)dst; \ + cb_data.args.hipMemcpy2DToArray.wOffset = (size_t)wOffset; \ + cb_data.args.hipMemcpy2DToArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpy2DToArray.src = (const void*)src; \ + cb_data.args.hipMemcpy2DToArray.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2DToArray.width = (size_t)width; \ + cb_data.args.hipMemcpy2DToArray.height = (size_t)height; \ + cb_data.args.hipMemcpy2DToArray.kind = (hipMemcpyKind)kind; \ +}; +#define INIT_hipCtxGetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \ +}; +#define INIT_hipStreamWaitEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWaitEvent.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWaitEvent.event = (hipEvent_t)event; \ + cb_data.args.hipStreamWaitEvent.flags = (unsigned int)flags; \ +}; +#define INIT_hipModuleLoad_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoad.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoad.fname = (const char*)fname; \ +}; +#define INIT_hipDevicePrimaryCtxSetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxSetFlags.dev = (hipDevice_t)dev; \ + cb_data.args.hipDevicePrimaryCtxSetFlags.flags = (unsigned int)flags; \ +}; +#define INIT_hipMemcpyAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyAsync.src = (const void*)src; \ + cb_data.args.hipMemcpyAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipMalloc3DArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMalloc3DArray.array = (hipArray**)array; \ + cb_data.args.hipMalloc3DArray.desc = (const hipChannelFormatDesc*)desc; \ + cb_data.args.hipMalloc3DArray.extent = (hipExtent)extent; \ + cb_data.args.hipMalloc3DArray.flags = (unsigned int)flags; \ +}; +#define INIT_hipStreamCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamCreate.stream = (hipStream_t*)stream; \ +}; +#define INIT_hipCtxGetCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetCurrent.ctx = (hipCtx_t*)ctx; \ +}; +#define INIT_hipDevicePrimaryCtxGetState_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxGetState.dev = (hipDevice_t)dev; \ + cb_data.args.hipDevicePrimaryCtxGetState.flags = (unsigned int*)flags; \ + cb_data.args.hipDevicePrimaryCtxGetState.active = (int*)active; \ +}; +#define INIT_hipEventQuery_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventQuery.event = (hipEvent_t)event; \ +}; +#define INIT_hipEventCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventCreate.event = (hipEvent_t*)event; \ +}; +#define INIT_hipMemGetAddressRange_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetAddressRange.pbase = (hipDeviceptr_t*)pbase; \ + cb_data.args.hipMemGetAddressRange.psize = (size_t*)psize; \ + cb_data.args.hipMemGetAddressRange.dptr = (hipDeviceptr_t)dptr; \ +}; +#define INIT_hipMemcpyFromSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyFromSymbol.dst = (void*)dst; \ + cb_data.args.hipMemcpyFromSymbol.symbolName = (const void*)symbolName; \ + cb_data.args.hipMemcpyFromSymbol.sizeBytes = (size_t)count; \ + cb_data.args.hipMemcpyFromSymbol.offset = (size_t)offset; \ + cb_data.args.hipMemcpyFromSymbol.kind = (hipMemcpyKind)kind; \ +}; +#define INIT_hipArrayCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArrayCreate.pHandle = (hipArray**)array; \ + cb_data.args.hipArrayCreate.pAllocateArray = (const HIP_ARRAY_DESCRIPTOR*)pAllocateArray; \ +}; +#define INIT_hipStreamGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetFlags.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetFlags.flags = (unsigned int*)flags; \ +}; +#define INIT_hipMallocArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocArray.array = (hipArray**)array; \ + cb_data.args.hipMallocArray.desc = (const hipChannelFormatDesc*)desc; \ + cb_data.args.hipMallocArray.width = (size_t)width; \ + cb_data.args.hipMallocArray.height = (size_t)height; \ + cb_data.args.hipMallocArray.flags = (unsigned int)flags; \ +}; +#define INIT_hipCtxGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \ +}; +#define INIT_hipMemPtrGetInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPtrGetInfo.ptr = (void*)ptr; \ + cb_data.args.hipMemPtrGetInfo.size = (size_t*)size; \ +}; +#define INIT_hipCtxGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetFlags.flags = (unsigned int*)flags; \ +}; +#define INIT_hipStreamDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamDestroy.stream = (hipStream_t)stream; \ +}; +#define INIT_hipMemset3DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset3DAsync.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \ + cb_data.args.hipMemset3DAsync.value = (int)value; \ + cb_data.args.hipMemset3DAsync.extent = (hipExtent)extent; \ + cb_data.args.hipMemset3DAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipMemcpy3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3D.p = (const hipMemcpy3DParms*)p; \ +}; +#define INIT_hipInit_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipInit.flags = (unsigned int)flags; \ +}; +#define INIT_hipMemcpyAtoH_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyAtoH.dst = (void*)dst; \ + cb_data.args.hipMemcpyAtoH.srcArray = (hipArray*)srcArray; \ + cb_data.args.hipMemcpyAtoH.srcOffset = (size_t)srcOffset; \ + cb_data.args.hipMemcpyAtoH.count = (size_t)count; \ +}; +#define INIT_hipMemset2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset2D.dst = (void*)dst; \ + cb_data.args.hipMemset2D.pitch = (size_t)pitch; \ + cb_data.args.hipMemset2D.value = (int)value; \ + cb_data.args.hipMemset2D.width = (size_t)width; \ + cb_data.args.hipMemset2D.height = (size_t)height; \ +}; +#define INIT_hipMemset2DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset2DAsync.dst = (void*)dst; \ + cb_data.args.hipMemset2DAsync.pitch = (size_t)pitch; \ + cb_data.args.hipMemset2DAsync.value = (int)value; \ + cb_data.args.hipMemset2DAsync.width = (size_t)width; \ + cb_data.args.hipMemset2DAsync.height = (size_t)height; \ + cb_data.args.hipMemset2DAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipDeviceCanAccessPeer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceCanAccessPeer.canAccessPeer = (int*)canAccessPeer; \ + cb_data.args.hipDeviceCanAccessPeer.deviceId = (int)deviceId; \ + cb_data.args.hipDeviceCanAccessPeer.peerDeviceId = (int)peerDeviceId; \ +}; +#define INIT_hipDeviceEnablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceEnablePeerAccess.peerDeviceId = (int)peerDeviceId; \ + cb_data.args.hipDeviceEnablePeerAccess.flags = (unsigned int)flags; \ +}; +#define INIT_hipModuleUnload_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleUnload.module = (hipModule_t)hmod; \ +}; +#define INIT_hipHostUnregister_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostUnregister.hostPtr = (void*)hostPtr; \ +}; +#define INIT_hipProfilerStop_CB_ARGS_DATA(cb_data) { \ +}; +#define INIT_hipLaunchByPtr_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchByPtr.func = (const void*)hostFunction; \ +}; +#define INIT_hipStreamSynchronize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamSynchronize.stream = (hipStream_t)stream; \ +}; +#define INIT_hipFreeHost_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeHost.ptr = (void*)ptr; \ +}; +#define INIT_hipRemoveApiCallback_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipRemoveApiCallback.id = (uint32_t)id; \ +}; +#define INIT_hipDeviceSetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \ +}; +#define INIT_hipCtxGetApiVersion_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetApiVersion.ctx = (hipCtx_t)ctx; \ + cb_data.args.hipCtxGetApiVersion.apiVersion = (int*)apiVersion; \ +}; +#define INIT_hipMemcpyHtoD_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoD.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemcpyHtoD.src = (void*)src; \ + cb_data.args.hipMemcpyHtoD.sizeBytes = (size_t)sizeBytes; \ +}; +#define INIT_hipModuleGetGlobal_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetGlobal.dptr = (hipDeviceptr_t*)dptr; \ + cb_data.args.hipModuleGetGlobal.bytes = (size_t*)bytes; \ + cb_data.args.hipModuleGetGlobal.hmod = (hipModule_t)hmod; \ + cb_data.args.hipModuleGetGlobal.name = (const char*)name; \ +}; +#define INIT_hipMemcpyHtoA_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoA.dstArray = (hipArray*)dstArray; \ + cb_data.args.hipMemcpyHtoA.dstOffset = (size_t)dstOffset; \ + cb_data.args.hipMemcpyHtoA.srcHost = (const void*)srcHost; \ + cb_data.args.hipMemcpyHtoA.count = (size_t)count; \ +}; +#define INIT_hipCtxCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxCreate.ctx = (hipCtx_t*)ctx; \ + cb_data.args.hipCtxCreate.flags = (unsigned int)flags; \ + cb_data.args.hipCtxCreate.device = (hipDevice_t)device; \ +}; +#define INIT_hipMemcpy2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2D.dst = (void*)dst; \ + cb_data.args.hipMemcpy2D.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2D.src = (const void*)src; \ + cb_data.args.hipMemcpy2D.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2D.width = (size_t)width; \ + cb_data.args.hipMemcpy2D.height = (size_t)height; \ + cb_data.args.hipMemcpy2D.kind = (hipMemcpyKind)kind; \ +}; +#define INIT_hipIpcCloseMemHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcCloseMemHandle.devPtr = (void*)devPtr; \ +}; +#define INIT_hipChooseDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipChooseDevice.device = (int*)device; \ + cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_t*)prop; \ +}; +#define INIT_hipDeviceSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetSharedMemConfig.config = (hipSharedMemConfig)config; \ +}; +#define INIT_hipDeviceComputeCapability_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceComputeCapability.major = (int*)major; \ + cb_data.args.hipDeviceComputeCapability.minor = (int*)minor; \ + cb_data.args.hipDeviceComputeCapability.device = (hipDevice_t)device; \ +}; +#define INIT_hipRegisterApiCallback_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipRegisterApiCallback.id = (uint32_t)id; \ + cb_data.args.hipRegisterApiCallback.fun = (void*)fun; \ + cb_data.args.hipRegisterApiCallback.arg = (void*)arg; \ +}; +#define INIT_hipDeviceGet_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGet.device = (hipDevice_t*)device; \ + cb_data.args.hipDeviceGet.ordinal = (int)deviceId; \ +}; +#define INIT_hipProfilerStart_CB_ARGS_DATA(cb_data) { \ +}; +#define INIT_hipCtxSetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \ +}; +#define INIT_hipFuncSetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncSetCacheConfig.func = (const void*)func; \ + cb_data.args.hipFuncSetCacheConfig.config = (hipFuncCache_t)cacheConfig; \ +}; +#define INIT_hipMemcpyPeerAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyPeerAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyPeerAsync.dstDeviceId = (int)dstDevice; \ + cb_data.args.hipMemcpyPeerAsync.src = (const void*)src; \ + cb_data.args.hipMemcpyPeerAsync.srcDevice = (int)srcDevice; \ + cb_data.args.hipMemcpyPeerAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyPeerAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipEventElapsedTime_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventElapsedTime.ms = (float*)ms; \ + cb_data.args.hipEventElapsedTime.start = (hipEvent_t)start; \ + cb_data.args.hipEventElapsedTime.stop = (hipEvent_t)stop; \ +}; +#define INIT_hipDevicePrimaryCtxReset_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxReset.dev = (hipDevice_t)dev; \ +}; +#define INIT_hipEventDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventDestroy.event = (hipEvent_t)event; \ +}; +#define INIT_hipCtxPopCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxPopCurrent.ctx = (hipCtx_t*)ctx; \ +}; +#define INIT_hipHostGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostGetFlags.flagsPtr = (unsigned int*)flagsPtr; \ + cb_data.args.hipHostGetFlags.hostPtr = (void*)hostPtr; \ +}; +#define INIT_hipHostMalloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostMalloc.ptr = (void**)ptr; \ + cb_data.args.hipHostMalloc.size = (size_t)sizeBytes; \ + cb_data.args.hipHostMalloc.flags = (unsigned int)flags; \ +}; +#define INIT_hipDriverGetVersion_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDriverGetVersion.driverVersion = (int*)driverVersion; \ +}; +#define INIT_hipMemGetInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetInfo.free = (size_t*)free; \ + cb_data.args.hipMemGetInfo.total = (size_t*)total; \ +}; +#define INIT_hipDeviceReset_CB_ARGS_DATA(cb_data) { \ +}; +#define INIT_hipMemset_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset.dst = (void*)dst; \ + cb_data.args.hipMemset.value = (int)value; \ + cb_data.args.hipMemset.sizeBytes = (size_t)sizeBytes; \ +}; +#define INIT_hipMemsetD8_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD8.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD8.value = (unsigned char)value; \ + cb_data.args.hipMemsetD8.sizeBytes = (size_t)sizeBytes; \ +}; +#define INIT_hipHostRegister_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostRegister.hostPtr = (void*)hostPtr; \ + cb_data.args.hipHostRegister.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipHostRegister.flags = (unsigned int)flags; \ +}; +#define INIT_hipCtxSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxSetSharedMemConfig.config = (hipSharedMemConfig)config; \ +}; +#define INIT_hipArray3DCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArray3DCreate.array = (hipArray**)array; \ + cb_data.args.hipArray3DCreate.pAllocateArray = (const HIP_ARRAY_DESCRIPTOR*)pAllocateArray; \ +}; +#define INIT_hipIpcOpenMemHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcOpenMemHandle.devPtr = (void**)devPtr; \ + cb_data.args.hipIpcOpenMemHandle.handle = (hipIpcMemHandle_t)handle; \ + cb_data.args.hipIpcOpenMemHandle.flags = (unsigned int)flags; \ +}; +#define INIT_hipGetLastError_CB_ARGS_DATA(cb_data) { \ +}; +#define INIT_hipCtxDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxDestroy.ctx = (hipCtx_t)ctx; \ +}; +#define INIT_hipDeviceGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \ +}; +#define INIT_hipRegisterActivityCallback_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipRegisterActivityCallback.id = (uint32_t)id; \ + cb_data.args.hipRegisterActivityCallback.fun = (void*)fun; \ + cb_data.args.hipRegisterActivityCallback.arg = (void*)arg; \ +}; +#define INIT_hipSetDeviceFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetDeviceFlags.flags = (unsigned)flags; \ +}; +#define INIT_hipFree_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFree.ptr = (void*)ptr; \ +}; +#define INIT_hipDeviceGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetAttribute.pi = (int*)pi; \ + cb_data.args.hipDeviceGetAttribute.attr = (hipDeviceAttribute_t)attr; \ + cb_data.args.hipDeviceGetAttribute.deviceId = (int)device; \ +}; +#define INIT_hipMemcpyDtoH_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoH.dst = (void*)dst; \ + cb_data.args.hipMemcpyDtoH.src = (hipDeviceptr_t)src; \ + cb_data.args.hipMemcpyDtoH.sizeBytes = (size_t)sizeBytes; \ +}; +#define INIT_hipCtxDisablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxDisablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \ +}; +#define INIT_hipDeviceGetByPCIBusId_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetByPCIBusId.device = (int*)device; \ + cb_data.args.hipDeviceGetByPCIBusId.pciBusId = (const char*)pciBusId; \ +}; +#define INIT_hipIpcGetMemHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcGetMemHandle.handle = (hipIpcMemHandle_t*)handle; \ + cb_data.args.hipIpcGetMemHandle.devPtr = (void*)devPtr; \ +}; +#define INIT_hipMemcpyHtoDAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoDAsync.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemcpyHtoDAsync.src = (void*)src; \ + cb_data.args.hipMemcpyHtoDAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyHtoDAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipCtxGetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetDevice.device = (hipDevice_t*)device; \ +}; +#define INIT_hipMemset3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset3D.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \ + cb_data.args.hipMemset3D.value = (int)value; \ + cb_data.args.hipMemset3D.extent = (hipExtent)extent; \ +}; +#define INIT_hipModuleLoadData_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoadData.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoadData.image = (const void*)image; \ +}; +#define INIT_hipDeviceTotalMem_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceTotalMem.bytes = (size_t*)bytes; \ + cb_data.args.hipDeviceTotalMem.device = (hipDevice_t)device; \ +}; +#define INIT_hipCtxSetCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxSetCurrent.ctx = (hipCtx_t)ctx; \ +}; +#define INIT_hipMallocHost_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocHost.ptr = (void**)ptr; \ + cb_data.args.hipMallocHost.size = (size_t)sizeBytes; \ +}; +#define INIT_hipDevicePrimaryCtxRetain_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxRetain.pctx = (hipCtx_t*)pctx; \ + cb_data.args.hipDevicePrimaryCtxRetain.dev = (hipDevice_t)dev; \ +}; +#define INIT_hipDeviceDisablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceDisablePeerAccess.peerDeviceId = (int)peerDeviceId; \ +}; +#define INIT_hipStreamCreateWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamCreateWithFlags.stream = (hipStream_t*)stream; \ + cb_data.args.hipStreamCreateWithFlags.flags = (unsigned int)flags; \ +}; +#define INIT_hipMemcpyFromArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyFromArray.dst = (void*)dst; \ + cb_data.args.hipMemcpyFromArray.srcArray = (hipArray_const_t)srcArray; \ + cb_data.args.hipMemcpyFromArray.wOffset = (size_t)wOffset; \ + cb_data.args.hipMemcpyFromArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpyFromArray.count = (size_t)count; \ + cb_data.args.hipMemcpyFromArray.kind = (hipMemcpyKind)kind; \ +}; +#define INIT_hipMemcpy2DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpy2DAsync.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2DAsync.src = (const void*)src; \ + cb_data.args.hipMemcpy2DAsync.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2DAsync.width = (size_t)width; \ + cb_data.args.hipMemcpy2DAsync.height = (size_t)height; \ + cb_data.args.hipMemcpy2DAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpy2DAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipFuncGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncGetAttributes.attr = (hipFuncAttributes*)attr; \ + cb_data.args.hipFuncGetAttributes.func = (const void*)func; \ +}; +#define INIT_hipEventCreateWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventCreateWithFlags.event = (hipEvent_t*)event; \ + cb_data.args.hipEventCreateWithFlags.flags = (unsigned)flags; \ +}; +#define INIT_hipStreamQuery_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamQuery.stream = (hipStream_t)stream; \ +}; +#define INIT_hipDeviceGetPCIBusId_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetPCIBusId.pciBusId = (char*)pciBusId; \ + cb_data.args.hipDeviceGetPCIBusId.len = (int)len; \ + cb_data.args.hipDeviceGetPCIBusId.device = (int)device; \ +}; +#define INIT_hipMemcpy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy.dst = (void*)dst; \ + cb_data.args.hipMemcpy.src = (const void*)src; \ + cb_data.args.hipMemcpy.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpy.kind = (hipMemcpyKind)kind; \ +}; +#define INIT_hipPeekAtLastError_CB_ARGS_DATA(cb_data) { \ +}; +#define INIT_hipHostAlloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostAlloc.ptr = (void**)ptr; \ + cb_data.args.hipHostAlloc.size = (size_t)sizeBytes; \ + cb_data.args.hipHostAlloc.flags = (unsigned int)flags; \ +}; +#define INIT_hipStreamAddCallback_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamAddCallback.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamAddCallback.callback = (hipStreamCallback_t)callback; \ + cb_data.args.hipStreamAddCallback.userData = (void*)userData; \ + cb_data.args.hipStreamAddCallback.flags = (unsigned int)flags; \ +}; +#define INIT_hipMemcpyToArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyToArray.dst = (hipArray*)dst; \ + cb_data.args.hipMemcpyToArray.wOffset = (size_t)wOffset; \ + cb_data.args.hipMemcpyToArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpyToArray.src = (const void*)src; \ + cb_data.args.hipMemcpyToArray.count = (size_t)count; \ + cb_data.args.hipMemcpyToArray.kind = (hipMemcpyKind)kind; \ +}; +#define INIT_hipDeviceSynchronize_CB_ARGS_DATA(cb_data) { \ +}; +#define INIT_hipDeviceGetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \ +}; +#define INIT_hipMalloc3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMalloc3D.pitchedDevPtr = (hipPitchedPtr*)pitchedDevPtr; \ + cb_data.args.hipMalloc3D.extent = (hipExtent)extent; \ +}; +#define INIT_hipPointerGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipPointerGetAttributes.attributes = (hipPointerAttribute_t*)attributes; \ + cb_data.args.hipPointerGetAttributes.ptr = (const void*)ptr; \ +}; +#define INIT_hipMemsetAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetAsync.dst = (void*)dst; \ + cb_data.args.hipMemsetAsync.value = (int)value; \ + cb_data.args.hipMemsetAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemsetAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipMemcpyToSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyToSymbol.symbolName = (const void*)symbolName; \ + cb_data.args.hipMemcpyToSymbol.src = (const void*)src; \ + cb_data.args.hipMemcpyToSymbol.sizeBytes = (size_t)count; \ + cb_data.args.hipMemcpyToSymbol.offset = (size_t)offset; \ + cb_data.args.hipMemcpyToSymbol.kind = (hipMemcpyKind)kind; \ +}; +#define INIT_hipCtxPushCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxPushCurrent.ctx = (hipCtx_t)ctx; \ +}; +#define INIT_hipMemcpyPeer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyPeer.dst = (void*)dst; \ + cb_data.args.hipMemcpyPeer.dstDeviceId = (int)dstDevice; \ + cb_data.args.hipMemcpyPeer.src = (const void*)src; \ + cb_data.args.hipMemcpyPeer.srcDeviceId = (int)srcDevice; \ + cb_data.args.hipMemcpyPeer.sizeBytes = (size_t)sizeBytes; \ +}; +#define INIT_hipEventSynchronize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventSynchronize.event = (hipEvent_t)event; \ +}; +#define INIT_hipMemcpyDtoDAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoDAsync.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemcpyDtoDAsync.src = (hipDeviceptr_t)src; \ + cb_data.args.hipMemcpyDtoDAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyDtoDAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipCtxEnablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxEnablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \ + cb_data.args.hipCtxEnablePeerAccess.flags = (unsigned int)flags; \ +}; +#define INIT_hipMemcpyDtoHAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoHAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyDtoHAsync.src = (hipDeviceptr_t)src; \ + cb_data.args.hipMemcpyDtoHAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyDtoHAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_hipModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLaunchKernel.f = (hipFunction_t)f; \ + cb_data.args.hipModuleLaunchKernel.gridDimX = (unsigned int)gridDimX; \ + cb_data.args.hipModuleLaunchKernel.gridDimY = (unsigned int)gridDimY; \ + cb_data.args.hipModuleLaunchKernel.gridDimZ = (unsigned int)gridDimZ; \ + cb_data.args.hipModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \ + cb_data.args.hipModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \ + cb_data.args.hipModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \ + cb_data.args.hipModuleLaunchKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \ + cb_data.args.hipModuleLaunchKernel.stream = (hipStream_t)hStream; \ + cb_data.args.hipModuleLaunchKernel.kernelParams = (void**)kernelParams; \ + cb_data.args.hipModuleLaunchKernel.extra = (void**)extra; \ +}; +#define INIT_hipHccModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLaunchKernel.f = (hipFunction_t)f; \ +}; +#define INIT_hipModuleGetTexRef_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetTexRef.texRef = (textureReference**)texRef; \ + cb_data.args.hipModuleGetTexRef.hmod = (hipModule_t)hmod; \ + cb_data.args.hipModuleGetTexRef.name = (const char*)name; \ +}; +#define INIT_hipRemoveActivityCallback_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipRemoveActivityCallback.id = (uint32_t)id; \ +}; +#define INIT_hipDeviceGetLimit_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetLimit.pValue = (size_t*)pValue; \ + cb_data.args.hipDeviceGetLimit.limit = (hipLimit_t)limit; \ +}; +#define INIT_hipModuleLoadDataEx_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoadDataEx.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoadDataEx.image = (const void*)image; \ + cb_data.args.hipModuleLoadDataEx.numOptions = (unsigned int)numOptions; \ + cb_data.args.hipModuleLoadDataEx.options = (hipJitOption*)options; \ + cb_data.args.hipModuleLoadDataEx.optionValues = (void**)optionValues; \ +}; +#define INIT_hipRuntimeGetVersion_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipRuntimeGetVersion.runtimeVersion = (int*)runtimeVersion; \ +}; +#define INIT_hipGetDeviceProperties_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDeviceProperties.prop = (hipDeviceProp_t*)props; \ + cb_data.args.hipGetDeviceProperties.deviceId = (int)device; \ +}; +#define INIT_hipFreeArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeArray.array = (hipArray*)array; \ +}; +#define INIT_hipDevicePrimaryCtxRelease_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxRelease.dev = (hipDevice_t)dev; \ +}; +#define INIT_hipHostGetDevicePointer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostGetDevicePointer.devPtr = (void**)devicePointer; \ + cb_data.args.hipHostGetDevicePointer.hstPtr = (void*)hostPointer; \ + cb_data.args.hipHostGetDevicePointer.flags = (unsigned int)flags; \ +}; +#define INIT_hipMemcpyParam2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyParam2D.pCopy = (const hip_Memcpy2D*)pCopy; \ +}; +#define INIT_hipConfigureCall_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipConfigureCall.gridDim = (dim3)gridDim; \ + cb_data.args.hipConfigureCall.blockDim = (dim3)blockDim; \ + cb_data.args.hipConfigureCall.sharedMem = (size_t)sharedMem; \ + cb_data.args.hipConfigureCall.stream = (hipStream_t)stream; \ +}; +#define INIT_hipModuleGetFunction_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetFunction.function = (hipFunction_t*)hfunc; \ + cb_data.args.hipModuleGetFunction.module = (hipModule_t)hmod; \ + cb_data.args.hipModuleGetFunction.kname = (const char*)name; \ +}; +#define INIT_hipGetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDevice.deviceId = (int*)deviceId; \ +}; +#define INIT_hipGetDeviceCount_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDeviceCount.count = (int*)count; \ +}; +#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data) + +#if 0 +// HIP API string method, method name and parameters +const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) { + std::ostringstream oss; + switch (id) { + case HIP_API_ID_hipHostFree: + oss << "hipHostFree(" + << " ptr=" << data->args.hipHostFree.ptr + << ")"; + break; + case HIP_API_ID_hipMemcpyToSymbolAsync: + oss << "hipMemcpyToSymbolAsync(" + << " symbolName=" << data->args.hipMemcpyToSymbolAsync.symbolName << "," + << " src=" << data->args.hipMemcpyToSymbolAsync.src << "," + << " sizeBytes=" << data->args.hipMemcpyToSymbolAsync.sizeBytes << "," + << " offset=" << data->args.hipMemcpyToSymbolAsync.offset << "," + << " kind=" << data->args.hipMemcpyToSymbolAsync.kind << "," + << " stream=" << data->args.hipMemcpyToSymbolAsync.stream + << ")"; + break; + case HIP_API_ID_hipMallocPitch: + oss << "hipMallocPitch(" + << " ptr=" << data->args.hipMallocPitch.ptr << "," + << " pitch=" << data->args.hipMallocPitch.pitch << "," + << " width=" << data->args.hipMallocPitch.width << "," + << " height=" << data->args.hipMallocPitch.height + << ")"; + break; + case HIP_API_ID_hipMalloc: + oss << "hipMalloc(" + << " ptr=" << data->args.hipMalloc.ptr << "," + << " size=" << data->args.hipMalloc.size + << ")"; + break; + case HIP_API_ID_hipDeviceGetName: + oss << "hipDeviceGetName(" + << " name=" << data->args.hipDeviceGetName.name << "," + << " len=" << data->args.hipDeviceGetName.len << "," + << " device=" << data->args.hipDeviceGetName.device + << ")"; + break; + case HIP_API_ID_hipEventRecord: + oss << "hipEventRecord(" + << " event=" << data->args.hipEventRecord.event << "," + << " stream=" << data->args.hipEventRecord.stream + << ")"; + break; + case HIP_API_ID_hipCtxSynchronize: + oss << "hipCtxSynchronize(" + << ")"; + break; + case HIP_API_ID_hipSetDevice: + oss << "hipSetDevice(" + << " deviceId=" << data->args.hipSetDevice.deviceId + << ")"; + break; + case HIP_API_ID_hipSetupArgument: + oss << "hipSetupArgument(" + << " arg=" << data->args.hipSetupArgument.arg << "," + << " size=" << data->args.hipSetupArgument.size << "," + << " offset=" << data->args.hipSetupArgument.offset + << ")"; + break; + case HIP_API_ID_hipMemcpyFromSymbolAsync: + oss << "hipMemcpyFromSymbolAsync(" + << " dst=" << data->args.hipMemcpyFromSymbolAsync.dst << "," + << " symbolName=" << data->args.hipMemcpyFromSymbolAsync.symbolName << "," + << " sizeBytes=" << data->args.hipMemcpyFromSymbolAsync.sizeBytes << "," + << " offset=" << data->args.hipMemcpyFromSymbolAsync.offset << "," + << " kind=" << data->args.hipMemcpyFromSymbolAsync.kind << "," + << " stream=" << data->args.hipMemcpyFromSymbolAsync.stream + << ")"; + break; + case HIP_API_ID_hipMemcpyDtoD: + oss << "hipMemcpyDtoD(" + << " dst=" << data->args.hipMemcpyDtoD.dst << "," + << " src=" << data->args.hipMemcpyDtoD.src << "," + << " sizeBytes=" << data->args.hipMemcpyDtoD.sizeBytes + << ")"; + break; + case HIP_API_ID_hipMemcpy2DToArray: + oss << "hipMemcpy2DToArray(" + << " dst=" << data->args.hipMemcpy2DToArray.dst << "," + << " wOffset=" << data->args.hipMemcpy2DToArray.wOffset << "," + << " hOffset=" << data->args.hipMemcpy2DToArray.hOffset << "," + << " src=" << data->args.hipMemcpy2DToArray.src << "," + << " spitch=" << data->args.hipMemcpy2DToArray.spitch << "," + << " width=" << data->args.hipMemcpy2DToArray.width << "," + << " height=" << data->args.hipMemcpy2DToArray.height << "," + << " kind=" << data->args.hipMemcpy2DToArray.kind + << ")"; + break; + case HIP_API_ID_hipCtxGetCacheConfig: + oss << "hipCtxGetCacheConfig(" + << " cacheConfig=" << data->args.hipCtxGetCacheConfig.cacheConfig + << ")"; + break; + case HIP_API_ID_hipStreamWaitEvent: + oss << "hipStreamWaitEvent(" + << " stream=" << data->args.hipStreamWaitEvent.stream << "," + << " event=" << data->args.hipStreamWaitEvent.event << "," + << " flags=" << data->args.hipStreamWaitEvent.flags + << ")"; + break; + case HIP_API_ID_hipModuleLoad: + oss << "hipModuleLoad(" + << " module=" << data->args.hipModuleLoad.module << "," + << " fname=" << data->args.hipModuleLoad.fname + << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxSetFlags: + oss << "hipDevicePrimaryCtxSetFlags(" + << " dev=" << data->args.hipDevicePrimaryCtxSetFlags.dev << "," + << " flags=" << data->args.hipDevicePrimaryCtxSetFlags.flags + << ")"; + break; + case HIP_API_ID_hipMemcpyAsync: + oss << "hipMemcpyAsync(" + << " dst=" << data->args.hipMemcpyAsync.dst << "," + << " src=" << data->args.hipMemcpyAsync.src << "," + << " sizeBytes=" << data->args.hipMemcpyAsync.sizeBytes << "," + << " kind=" << data->args.hipMemcpyAsync.kind << "," + << " stream=" << data->args.hipMemcpyAsync.stream + << ")"; + break; + case HIP_API_ID_hipMalloc3DArray: + oss << "hipMalloc3DArray(" + << " array=" << data->args.hipMalloc3DArray.array << "," + << " desc=" << data->args.hipMalloc3DArray.desc << "," + << " extent=" << data->args.hipMalloc3DArray.extent << "," + << " flags=" << data->args.hipMalloc3DArray.flags + << ")"; + break; + case HIP_API_ID_hipStreamCreate: + oss << "hipStreamCreate(" + << " stream=" << data->args.hipStreamCreate.stream + << ")"; + break; + case HIP_API_ID_hipCtxGetCurrent: + oss << "hipCtxGetCurrent(" + << " ctx=" << data->args.hipCtxGetCurrent.ctx + << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxGetState: + oss << "hipDevicePrimaryCtxGetState(" + << " dev=" << data->args.hipDevicePrimaryCtxGetState.dev << "," + << " flags=" << data->args.hipDevicePrimaryCtxGetState.flags << "," + << " active=" << data->args.hipDevicePrimaryCtxGetState.active + << ")"; + break; + case HIP_API_ID_hipEventQuery: + oss << "hipEventQuery(" + << " event=" << data->args.hipEventQuery.event + << ")"; + break; + case HIP_API_ID_hipEventCreate: + oss << "hipEventCreate(" + << " event=" << data->args.hipEventCreate.event + << ")"; + break; + case HIP_API_ID_hipMemGetAddressRange: + oss << "hipMemGetAddressRange(" + << " pbase=" << data->args.hipMemGetAddressRange.pbase << "," + << " psize=" << data->args.hipMemGetAddressRange.psize << "," + << " dptr=" << data->args.hipMemGetAddressRange.dptr + << ")"; + break; + case HIP_API_ID_hipMemcpyFromSymbol: + oss << "hipMemcpyFromSymbol(" + << " dst=" << data->args.hipMemcpyFromSymbol.dst << "," + << " symbolName=" << data->args.hipMemcpyFromSymbol.symbolName << "," + << " sizeBytes=" << data->args.hipMemcpyFromSymbol.sizeBytes << "," + << " offset=" << data->args.hipMemcpyFromSymbol.offset << "," + << " kind=" << data->args.hipMemcpyFromSymbol.kind + << ")"; + break; + case HIP_API_ID_hipArrayCreate: + oss << "hipArrayCreate(" + << " pHandle=" << data->args.hipArrayCreate.pHandle << "," + << " pAllocateArray=" << data->args.hipArrayCreate.pAllocateArray + << ")"; + break; + case HIP_API_ID_hipStreamGetFlags: + oss << "hipStreamGetFlags(" + << " stream=" << data->args.hipStreamGetFlags.stream << "," + << " flags=" << data->args.hipStreamGetFlags.flags + << ")"; + break; + case HIP_API_ID_hipMallocArray: + oss << "hipMallocArray(" + << " array=" << data->args.hipMallocArray.array << "," + << " desc=" << data->args.hipMallocArray.desc << "," + << " width=" << data->args.hipMallocArray.width << "," + << " height=" << data->args.hipMallocArray.height << "," + << " flags=" << data->args.hipMallocArray.flags + << ")"; + break; + case HIP_API_ID_hipCtxGetSharedMemConfig: + oss << "hipCtxGetSharedMemConfig(" + << " pConfig=" << data->args.hipCtxGetSharedMemConfig.pConfig + << ")"; + break; + case HIP_API_ID_hipMemPtrGetInfo: + oss << "hipMemPtrGetInfo(" + << " ptr=" << data->args.hipMemPtrGetInfo.ptr << "," + << " size=" << data->args.hipMemPtrGetInfo.size + << ")"; + break; + case HIP_API_ID_hipCtxGetFlags: + oss << "hipCtxGetFlags(" + << " flags=" << data->args.hipCtxGetFlags.flags + << ")"; + break; + case HIP_API_ID_hipStreamDestroy: + oss << "hipStreamDestroy(" + << " stream=" << data->args.hipStreamDestroy.stream + << ")"; + break; + case HIP_API_ID_hipMemset3DAsync: + oss << "hipMemset3DAsync(" + << " pitchedDevPtr=" << data->args.hipMemset3DAsync.pitchedDevPtr << "," + << " value=" << data->args.hipMemset3DAsync.value << "," + << " extent=" << data->args.hipMemset3DAsync.extent << "," + << " stream=" << data->args.hipMemset3DAsync.stream + << ")"; + break; + case HIP_API_ID_hipMemcpy3D: + oss << "hipMemcpy3D(" + << " p=" << data->args.hipMemcpy3D.p + << ")"; + break; + case HIP_API_ID_hipInit: + oss << "hipInit(" + << " flags=" << data->args.hipInit.flags + << ")"; + break; + case HIP_API_ID_hipMemcpyAtoH: + oss << "hipMemcpyAtoH(" + << " dst=" << data->args.hipMemcpyAtoH.dst << "," + << " srcArray=" << data->args.hipMemcpyAtoH.srcArray << "," + << " srcOffset=" << data->args.hipMemcpyAtoH.srcOffset << "," + << " count=" << data->args.hipMemcpyAtoH.count + << ")"; + break; + case HIP_API_ID_hipMemset2D: + oss << "hipMemset2D(" + << " dst=" << data->args.hipMemset2D.dst << "," + << " pitch=" << data->args.hipMemset2D.pitch << "," + << " value=" << data->args.hipMemset2D.value << "," + << " width=" << data->args.hipMemset2D.width << "," + << " height=" << data->args.hipMemset2D.height + << ")"; + break; + case HIP_API_ID_hipMemset2DAsync: + oss << "hipMemset2DAsync(" + << " dst=" << data->args.hipMemset2DAsync.dst << "," + << " pitch=" << data->args.hipMemset2DAsync.pitch << "," + << " value=" << data->args.hipMemset2DAsync.value << "," + << " width=" << data->args.hipMemset2DAsync.width << "," + << " height=" << data->args.hipMemset2DAsync.height << "," + << " stream=" << data->args.hipMemset2DAsync.stream + << ")"; + break; + case HIP_API_ID_hipDeviceCanAccessPeer: + oss << "hipDeviceCanAccessPeer(" + << " canAccessPeer=" << data->args.hipDeviceCanAccessPeer.canAccessPeer << "," + << " deviceId=" << data->args.hipDeviceCanAccessPeer.deviceId << "," + << " peerDeviceId=" << data->args.hipDeviceCanAccessPeer.peerDeviceId + << ")"; + break; + case HIP_API_ID_hipDeviceEnablePeerAccess: + oss << "hipDeviceEnablePeerAccess(" + << " peerDeviceId=" << data->args.hipDeviceEnablePeerAccess.peerDeviceId << "," + << " flags=" << data->args.hipDeviceEnablePeerAccess.flags + << ")"; + break; + case HIP_API_ID_hipModuleUnload: + oss << "hipModuleUnload(" + << " module=" << data->args.hipModuleUnload.module + << ")"; + break; + case HIP_API_ID_hipHostUnregister: + oss << "hipHostUnregister(" + << " hostPtr=" << data->args.hipHostUnregister.hostPtr + << ")"; + break; + case HIP_API_ID_hipProfilerStop: + oss << "hipProfilerStop(" + << ")"; + break; + case HIP_API_ID_hipLaunchByPtr: + oss << "hipLaunchByPtr(" + << " func=" << data->args.hipLaunchByPtr.func + << ")"; + break; + case HIP_API_ID_hipStreamSynchronize: + oss << "hipStreamSynchronize(" + << " stream=" << data->args.hipStreamSynchronize.stream + << ")"; + break; + case HIP_API_ID_hipFreeHost: + oss << "hipFreeHost(" + << " ptr=" << data->args.hipFreeHost.ptr + << ")"; + break; + case HIP_API_ID_hipRemoveApiCallback: + oss << "hipRemoveApiCallback(" + << " id=" << data->args.hipRemoveApiCallback.id + << ")"; + break; + case HIP_API_ID_hipDeviceSetCacheConfig: + oss << "hipDeviceSetCacheConfig(" + << " cacheConfig=" << data->args.hipDeviceSetCacheConfig.cacheConfig + << ")"; + break; + case HIP_API_ID_hipCtxGetApiVersion: + oss << "hipCtxGetApiVersion(" + << " ctx=" << data->args.hipCtxGetApiVersion.ctx << "," + << " apiVersion=" << data->args.hipCtxGetApiVersion.apiVersion + << ")"; + break; + case HIP_API_ID_hipMemcpyHtoD: + oss << "hipMemcpyHtoD(" + << " dst=" << data->args.hipMemcpyHtoD.dst << "," + << " src=" << data->args.hipMemcpyHtoD.src << "," + << " sizeBytes=" << data->args.hipMemcpyHtoD.sizeBytes + << ")"; + break; + case HIP_API_ID_hipModuleGetGlobal: + oss << "hipModuleGetGlobal(" + << " dptr=" << data->args.hipModuleGetGlobal.dptr << "," + << " bytes=" << data->args.hipModuleGetGlobal.bytes << "," + << " hmod=" << data->args.hipModuleGetGlobal.hmod << "," + << " name=" << data->args.hipModuleGetGlobal.name + << ")"; + break; + case HIP_API_ID_hipMemcpyHtoA: + oss << "hipMemcpyHtoA(" + << " dstArray=" << data->args.hipMemcpyHtoA.dstArray << "," + << " dstOffset=" << data->args.hipMemcpyHtoA.dstOffset << "," + << " srcHost=" << data->args.hipMemcpyHtoA.srcHost << "," + << " count=" << data->args.hipMemcpyHtoA.count + << ")"; + break; + case HIP_API_ID_hipCtxCreate: + oss << "hipCtxCreate(" + << " ctx=" << data->args.hipCtxCreate.ctx << "," + << " flags=" << data->args.hipCtxCreate.flags << "," + << " device=" << data->args.hipCtxCreate.device + << ")"; + break; + case HIP_API_ID_hipMemcpy2D: + oss << "hipMemcpy2D(" + << " dst=" << data->args.hipMemcpy2D.dst << "," + << " dpitch=" << data->args.hipMemcpy2D.dpitch << "," + << " src=" << data->args.hipMemcpy2D.src << "," + << " spitch=" << data->args.hipMemcpy2D.spitch << "," + << " width=" << data->args.hipMemcpy2D.width << "," + << " height=" << data->args.hipMemcpy2D.height << "," + << " kind=" << data->args.hipMemcpy2D.kind + << ")"; + break; + case HIP_API_ID_hipIpcCloseMemHandle: + oss << "hipIpcCloseMemHandle(" + << " devPtr=" << data->args.hipIpcCloseMemHandle.devPtr + << ")"; + break; + case HIP_API_ID_hipChooseDevice: + oss << "hipChooseDevice(" + << " device=" << data->args.hipChooseDevice.device << "," + << " prop=" << data->args.hipChooseDevice.prop + << ")"; + break; + case HIP_API_ID_hipDeviceSetSharedMemConfig: + oss << "hipDeviceSetSharedMemConfig(" + << " config=" << data->args.hipDeviceSetSharedMemConfig.config + << ")"; + break; + case HIP_API_ID_hipDeviceComputeCapability: + oss << "hipDeviceComputeCapability(" + << " major=" << data->args.hipDeviceComputeCapability.major << "," + << " minor=" << data->args.hipDeviceComputeCapability.minor << "," + << " device=" << data->args.hipDeviceComputeCapability.device + << ")"; + break; + case HIP_API_ID_hipRegisterApiCallback: + oss << "hipRegisterApiCallback(" + << " id=" << data->args.hipRegisterApiCallback.id << "," + << " fun=" << data->args.hipRegisterApiCallback.fun << "," + << " arg=" << data->args.hipRegisterApiCallback.arg + << ")"; + break; + case HIP_API_ID_hipDeviceGet: + oss << "hipDeviceGet(" + << " device=" << data->args.hipDeviceGet.device << "," + << " ordinal=" << data->args.hipDeviceGet.ordinal + << ")"; + break; + case HIP_API_ID_hipProfilerStart: + oss << "hipProfilerStart(" + << ")"; + break; + case HIP_API_ID_hipCtxSetCacheConfig: + oss << "hipCtxSetCacheConfig(" + << " cacheConfig=" << data->args.hipCtxSetCacheConfig.cacheConfig + << ")"; + break; + case HIP_API_ID_hipFuncSetCacheConfig: + oss << "hipFuncSetCacheConfig(" + << " func=" << data->args.hipFuncSetCacheConfig.func << "," + << " config=" << data->args.hipFuncSetCacheConfig.config + << ")"; + break; + case HIP_API_ID_hipMemcpyPeerAsync: + oss << "hipMemcpyPeerAsync(" + << " dst=" << data->args.hipMemcpyPeerAsync.dst << "," + << " dstDeviceId=" << data->args.hipMemcpyPeerAsync.dstDeviceId << "," + << " src=" << data->args.hipMemcpyPeerAsync.src << "," + << " srcDevice=" << data->args.hipMemcpyPeerAsync.srcDevice << "," + << " sizeBytes=" << data->args.hipMemcpyPeerAsync.sizeBytes << "," + << " stream=" << data->args.hipMemcpyPeerAsync.stream + << ")"; + break; + case HIP_API_ID_hipEventElapsedTime: + oss << "hipEventElapsedTime(" + << " ms=" << data->args.hipEventElapsedTime.ms << "," + << " start=" << data->args.hipEventElapsedTime.start << "," + << " stop=" << data->args.hipEventElapsedTime.stop + << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxReset: + oss << "hipDevicePrimaryCtxReset(" + << " dev=" << data->args.hipDevicePrimaryCtxReset.dev + << ")"; + break; + case HIP_API_ID_hipEventDestroy: + oss << "hipEventDestroy(" + << " event=" << data->args.hipEventDestroy.event + << ")"; + break; + case HIP_API_ID_hipCtxPopCurrent: + oss << "hipCtxPopCurrent(" + << " ctx=" << data->args.hipCtxPopCurrent.ctx + << ")"; + break; + case HIP_API_ID_hipHostGetFlags: + oss << "hipHostGetFlags(" + << " flagsPtr=" << data->args.hipHostGetFlags.flagsPtr << "," + << " hostPtr=" << data->args.hipHostGetFlags.hostPtr + << ")"; + break; + case HIP_API_ID_hipHostMalloc: + oss << "hipHostMalloc(" + << " ptr=" << data->args.hipHostMalloc.ptr << "," + << " size=" << data->args.hipHostMalloc.size << "," + << " flags=" << data->args.hipHostMalloc.flags + << ")"; + break; + case HIP_API_ID_hipDriverGetVersion: + oss << "hipDriverGetVersion(" + << " driverVersion=" << data->args.hipDriverGetVersion.driverVersion + << ")"; + break; + case HIP_API_ID_hipMemGetInfo: + oss << "hipMemGetInfo(" + << " free=" << data->args.hipMemGetInfo.free << "," + << " total=" << data->args.hipMemGetInfo.total + << ")"; + break; + case HIP_API_ID_hipDeviceReset: + oss << "hipDeviceReset(" + << ")"; + break; + case HIP_API_ID_hipMemset: + oss << "hipMemset(" + << " dst=" << data->args.hipMemset.dst << "," + << " value=" << data->args.hipMemset.value << "," + << " sizeBytes=" << data->args.hipMemset.sizeBytes + << ")"; + break; + case HIP_API_ID_hipMemsetD8: + oss << "hipMemsetD8(" + << " dest=" << data->args.hipMemsetD8.dest << "," + << " value=" << data->args.hipMemsetD8.value << "," + << " sizeBytes=" << data->args.hipMemsetD8.sizeBytes + << ")"; + break; + case HIP_API_ID_hipHostRegister: + oss << "hipHostRegister(" + << " hostPtr=" << data->args.hipHostRegister.hostPtr << "," + << " sizeBytes=" << data->args.hipHostRegister.sizeBytes << "," + << " flags=" << data->args.hipHostRegister.flags + << ")"; + break; + case HIP_API_ID_hipCtxSetSharedMemConfig: + oss << "hipCtxSetSharedMemConfig(" + << " config=" << data->args.hipCtxSetSharedMemConfig.config + << ")"; + break; + case HIP_API_ID_hipArray3DCreate: + oss << "hipArray3DCreate(" + << " array=" << data->args.hipArray3DCreate.array << "," + << " pAllocateArray=" << data->args.hipArray3DCreate.pAllocateArray + << ")"; + break; + case HIP_API_ID_hipIpcOpenMemHandle: + oss << "hipIpcOpenMemHandle(" + << " devPtr=" << data->args.hipIpcOpenMemHandle.devPtr << "," + << " handle=" << data->args.hipIpcOpenMemHandle.handle << "," + << " flags=" << data->args.hipIpcOpenMemHandle.flags + << ")"; + break; + case HIP_API_ID_hipGetLastError: + oss << "hipGetLastError(" + << ")"; + break; + case HIP_API_ID_hipCtxDestroy: + oss << "hipCtxDestroy(" + << " ctx=" << data->args.hipCtxDestroy.ctx + << ")"; + break; + case HIP_API_ID_hipDeviceGetSharedMemConfig: + oss << "hipDeviceGetSharedMemConfig(" + << " pConfig=" << data->args.hipDeviceGetSharedMemConfig.pConfig + << ")"; + break; + case HIP_API_ID_hipRegisterActivityCallback: + oss << "hipRegisterActivityCallback(" + << " id=" << data->args.hipRegisterActivityCallback.id << "," + << " fun=" << data->args.hipRegisterActivityCallback.fun << "," + << " arg=" << data->args.hipRegisterActivityCallback.arg + << ")"; + break; + case HIP_API_ID_hipSetDeviceFlags: + oss << "hipSetDeviceFlags(" + << " flags=" << data->args.hipSetDeviceFlags.flags + << ")"; + break; + case HIP_API_ID_hipFree: + oss << "hipFree(" + << " ptr=" << data->args.hipFree.ptr + << ")"; + break; + case HIP_API_ID_hipDeviceGetAttribute: + oss << "hipDeviceGetAttribute(" + << " pi=" << data->args.hipDeviceGetAttribute.pi << "," + << " attr=" << data->args.hipDeviceGetAttribute.attr << "," + << " deviceId=" << data->args.hipDeviceGetAttribute.deviceId + << ")"; + break; + case HIP_API_ID_hipMemcpyDtoH: + oss << "hipMemcpyDtoH(" + << " dst=" << data->args.hipMemcpyDtoH.dst << "," + << " src=" << data->args.hipMemcpyDtoH.src << "," + << " sizeBytes=" << data->args.hipMemcpyDtoH.sizeBytes + << ")"; + break; + case HIP_API_ID_hipCtxDisablePeerAccess: + oss << "hipCtxDisablePeerAccess(" + << " peerCtx=" << data->args.hipCtxDisablePeerAccess.peerCtx + << ")"; + break; + case HIP_API_ID_hipDeviceGetByPCIBusId: + oss << "hipDeviceGetByPCIBusId(" + << " device=" << data->args.hipDeviceGetByPCIBusId.device << "," + << " pciBusId=" << data->args.hipDeviceGetByPCIBusId.pciBusId + << ")"; + break; + case HIP_API_ID_hipIpcGetMemHandle: + oss << "hipIpcGetMemHandle(" + << " handle=" << data->args.hipIpcGetMemHandle.handle << "," + << " devPtr=" << data->args.hipIpcGetMemHandle.devPtr + << ")"; + break; + case HIP_API_ID_hipMemcpyHtoDAsync: + oss << "hipMemcpyHtoDAsync(" + << " dst=" << data->args.hipMemcpyHtoDAsync.dst << "," + << " src=" << data->args.hipMemcpyHtoDAsync.src << "," + << " sizeBytes=" << data->args.hipMemcpyHtoDAsync.sizeBytes << "," + << " stream=" << data->args.hipMemcpyHtoDAsync.stream + << ")"; + break; + case HIP_API_ID_hipCtxGetDevice: + oss << "hipCtxGetDevice(" + << " device=" << data->args.hipCtxGetDevice.device + << ")"; + break; + case HIP_API_ID_hipMemset3D: + oss << "hipMemset3D(" + << " pitchedDevPtr=" << data->args.hipMemset3D.pitchedDevPtr << "," + << " value=" << data->args.hipMemset3D.value << "," + << " extent=" << data->args.hipMemset3D.extent + << ")"; + break; + case HIP_API_ID_hipModuleLoadData: + oss << "hipModuleLoadData(" + << " module=" << data->args.hipModuleLoadData.module << "," + << " image=" << data->args.hipModuleLoadData.image + << ")"; + break; + case HIP_API_ID_hipDeviceTotalMem: + oss << "hipDeviceTotalMem(" + << " bytes=" << data->args.hipDeviceTotalMem.bytes << "," + << " device=" << data->args.hipDeviceTotalMem.device + << ")"; + break; + case HIP_API_ID_hipCtxSetCurrent: + oss << "hipCtxSetCurrent(" + << " ctx=" << data->args.hipCtxSetCurrent.ctx + << ")"; + break; + case HIP_API_ID_hipMallocHost: + oss << "hipMallocHost(" + << " ptr=" << data->args.hipMallocHost.ptr << "," + << " size=" << data->args.hipMallocHost.size + << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxRetain: + oss << "hipDevicePrimaryCtxRetain(" + << " pctx=" << data->args.hipDevicePrimaryCtxRetain.pctx << "," + << " dev=" << data->args.hipDevicePrimaryCtxRetain.dev + << ")"; + break; + case HIP_API_ID_hipDeviceDisablePeerAccess: + oss << "hipDeviceDisablePeerAccess(" + << " peerDeviceId=" << data->args.hipDeviceDisablePeerAccess.peerDeviceId + << ")"; + break; + case HIP_API_ID_hipStreamCreateWithFlags: + oss << "hipStreamCreateWithFlags(" + << " stream=" << data->args.hipStreamCreateWithFlags.stream << "," + << " flags=" << data->args.hipStreamCreateWithFlags.flags + << ")"; + break; + case HIP_API_ID_hipMemcpyFromArray: + oss << "hipMemcpyFromArray(" + << " dst=" << data->args.hipMemcpyFromArray.dst << "," + << " srcArray=" << data->args.hipMemcpyFromArray.srcArray << "," + << " wOffset=" << data->args.hipMemcpyFromArray.wOffset << "," + << " hOffset=" << data->args.hipMemcpyFromArray.hOffset << "," + << " count=" << data->args.hipMemcpyFromArray.count << "," + << " kind=" << data->args.hipMemcpyFromArray.kind + << ")"; + break; + case HIP_API_ID_hipMemcpy2DAsync: + oss << "hipMemcpy2DAsync(" + << " dst=" << data->args.hipMemcpy2DAsync.dst << "," + << " dpitch=" << data->args.hipMemcpy2DAsync.dpitch << "," + << " src=" << data->args.hipMemcpy2DAsync.src << "," + << " spitch=" << data->args.hipMemcpy2DAsync.spitch << "," + << " width=" << data->args.hipMemcpy2DAsync.width << "," + << " height=" << data->args.hipMemcpy2DAsync.height << "," + << " kind=" << data->args.hipMemcpy2DAsync.kind << "," + << " stream=" << data->args.hipMemcpy2DAsync.stream + << ")"; + break; + case HIP_API_ID_hipFuncGetAttributes: + oss << "hipFuncGetAttributes(" + << " attr=" << data->args.hipFuncGetAttributes.attr << "," + << " func=" << data->args.hipFuncGetAttributes.func + << ")"; + break; + case HIP_API_ID_hipEventCreateWithFlags: + oss << "hipEventCreateWithFlags(" + << " event=" << data->args.hipEventCreateWithFlags.event << "," + << " flags=" << data->args.hipEventCreateWithFlags.flags + << ")"; + break; + case HIP_API_ID_hipStreamQuery: + oss << "hipStreamQuery(" + << " stream=" << data->args.hipStreamQuery.stream + << ")"; + break; + case HIP_API_ID_hipDeviceGetPCIBusId: + oss << "hipDeviceGetPCIBusId(" + << " pciBusId=" << data->args.hipDeviceGetPCIBusId.pciBusId << "," + << " len=" << data->args.hipDeviceGetPCIBusId.len << "," + << " device=" << data->args.hipDeviceGetPCIBusId.device + << ")"; + break; + case HIP_API_ID_hipMemcpy: + oss << "hipMemcpy(" + << " dst=" << data->args.hipMemcpy.dst << "," + << " src=" << data->args.hipMemcpy.src << "," + << " sizeBytes=" << data->args.hipMemcpy.sizeBytes << "," + << " kind=" << data->args.hipMemcpy.kind + << ")"; + break; + case HIP_API_ID_hipPeekAtLastError: + oss << "hipPeekAtLastError(" + << ")"; + break; + case HIP_API_ID_hipHostAlloc: + oss << "hipHostAlloc(" + << " ptr=" << data->args.hipHostAlloc.ptr << "," + << " size=" << data->args.hipHostAlloc.size << "," + << " flags=" << data->args.hipHostAlloc.flags + << ")"; + break; + case HIP_API_ID_hipStreamAddCallback: + oss << "hipStreamAddCallback(" + << " stream=" << data->args.hipStreamAddCallback.stream << "," + << " callback=" << data->args.hipStreamAddCallback.callback << "," + << " userData=" << data->args.hipStreamAddCallback.userData << "," + << " flags=" << data->args.hipStreamAddCallback.flags + << ")"; + break; + case HIP_API_ID_hipMemcpyToArray: + oss << "hipMemcpyToArray(" + << " dst=" << data->args.hipMemcpyToArray.dst << "," + << " wOffset=" << data->args.hipMemcpyToArray.wOffset << "," + << " hOffset=" << data->args.hipMemcpyToArray.hOffset << "," + << " src=" << data->args.hipMemcpyToArray.src << "," + << " count=" << data->args.hipMemcpyToArray.count << "," + << " kind=" << data->args.hipMemcpyToArray.kind + << ")"; + break; + case HIP_API_ID_hipDeviceSynchronize: + oss << "hipDeviceSynchronize(" + << ")"; + break; + case HIP_API_ID_hipDeviceGetCacheConfig: + oss << "hipDeviceGetCacheConfig(" + << " cacheConfig=" << data->args.hipDeviceGetCacheConfig.cacheConfig + << ")"; + break; + case HIP_API_ID_hipMalloc3D: + oss << "hipMalloc3D(" + << " pitchedDevPtr=" << data->args.hipMalloc3D.pitchedDevPtr << "," + << " extent=" << data->args.hipMalloc3D.extent + << ")"; + break; + case HIP_API_ID_hipPointerGetAttributes: + oss << "hipPointerGetAttributes(" + << " attributes=" << data->args.hipPointerGetAttributes.attributes << "," + << " ptr=" << data->args.hipPointerGetAttributes.ptr + << ")"; + break; + case HIP_API_ID_hipMemsetAsync: + oss << "hipMemsetAsync(" + << " dst=" << data->args.hipMemsetAsync.dst << "," + << " value=" << data->args.hipMemsetAsync.value << "," + << " sizeBytes=" << data->args.hipMemsetAsync.sizeBytes << "," + << " stream=" << data->args.hipMemsetAsync.stream + << ")"; + break; + case HIP_API_ID_hipMemcpyToSymbol: + oss << "hipMemcpyToSymbol(" + << " symbolName=" << data->args.hipMemcpyToSymbol.symbolName << "," + << " src=" << data->args.hipMemcpyToSymbol.src << "," + << " sizeBytes=" << data->args.hipMemcpyToSymbol.sizeBytes << "," + << " offset=" << data->args.hipMemcpyToSymbol.offset << "," + << " kind=" << data->args.hipMemcpyToSymbol.kind + << ")"; + break; + case HIP_API_ID_hipCtxPushCurrent: + oss << "hipCtxPushCurrent(" + << " ctx=" << data->args.hipCtxPushCurrent.ctx + << ")"; + break; + case HIP_API_ID_hipMemcpyPeer: + oss << "hipMemcpyPeer(" + << " dst=" << data->args.hipMemcpyPeer.dst << "," + << " dstDeviceId=" << data->args.hipMemcpyPeer.dstDeviceId << "," + << " src=" << data->args.hipMemcpyPeer.src << "," + << " srcDeviceId=" << data->args.hipMemcpyPeer.srcDeviceId << "," + << " sizeBytes=" << data->args.hipMemcpyPeer.sizeBytes + << ")"; + break; + case HIP_API_ID_hipEventSynchronize: + oss << "hipEventSynchronize(" + << " event=" << data->args.hipEventSynchronize.event + << ")"; + break; + case HIP_API_ID_hipMemcpyDtoDAsync: + oss << "hipMemcpyDtoDAsync(" + << " dst=" << data->args.hipMemcpyDtoDAsync.dst << "," + << " src=" << data->args.hipMemcpyDtoDAsync.src << "," + << " sizeBytes=" << data->args.hipMemcpyDtoDAsync.sizeBytes << "," + << " stream=" << data->args.hipMemcpyDtoDAsync.stream + << ")"; + break; + case HIP_API_ID_hipCtxEnablePeerAccess: + oss << "hipCtxEnablePeerAccess(" + << " peerCtx=" << data->args.hipCtxEnablePeerAccess.peerCtx << "," + << " flags=" << data->args.hipCtxEnablePeerAccess.flags + << ")"; + break; + case HIP_API_ID_hipMemcpyDtoHAsync: + oss << "hipMemcpyDtoHAsync(" + << " dst=" << data->args.hipMemcpyDtoHAsync.dst << "," + << " src=" << data->args.hipMemcpyDtoHAsync.src << "," + << " sizeBytes=" << data->args.hipMemcpyDtoHAsync.sizeBytes << "," + << " stream=" << data->args.hipMemcpyDtoHAsync.stream + << ")"; + break; + case HIP_API_ID_hipModuleLaunchKernel: + oss << "hipModuleLaunchKernel(" + << " f=" << data->args.hipModuleLaunchKernel.f << "," + << " gridDimX=" << data->args.hipModuleLaunchKernel.gridDimX << "," + << " gridDimY=" << data->args.hipModuleLaunchKernel.gridDimY << "," + << " gridDimZ=" << data->args.hipModuleLaunchKernel.gridDimZ << "," + << " blockDimX=" << data->args.hipModuleLaunchKernel.blockDimX << "," + << " blockDimY=" << data->args.hipModuleLaunchKernel.blockDimY << "," + << " blockDimZ=" << data->args.hipModuleLaunchKernel.blockDimZ << "," + << " sharedMemBytes=" << data->args.hipModuleLaunchKernel.sharedMemBytes << "," + << " stream=" << data->args.hipModuleLaunchKernel.stream << "," + << " kernelParams=" << data->args.hipModuleLaunchKernel.kernelParams << "," + << " extra=" << data->args.hipModuleLaunchKernel.extra + << ")"; + break; + case HIP_API_ID_hipHccModuleLaunchKernel: + oss << "hipHccModuleLaunchKernel(" + << " f=" << data->args.hipHccModuleLaunchKernel.f << "," + << ")"; + break; + case HIP_API_ID_hipModuleGetTexRef: + oss << "hipModuleGetTexRef(" + << " texRef=" << data->args.hipModuleGetTexRef.texRef << "," + << " hmod=" << data->args.hipModuleGetTexRef.hmod << "," + << " name=" << data->args.hipModuleGetTexRef.name + << ")"; + break; + case HIP_API_ID_hipRemoveActivityCallback: + oss << "hipRemoveActivityCallback(" + << " id=" << data->args.hipRemoveActivityCallback.id + << ")"; + break; + case HIP_API_ID_hipDeviceGetLimit: + oss << "hipDeviceGetLimit(" + << " pValue=" << data->args.hipDeviceGetLimit.pValue << "," + << " limit=" << data->args.hipDeviceGetLimit.limit + << ")"; + break; + case HIP_API_ID_hipModuleLoadDataEx: + oss << "hipModuleLoadDataEx(" + << " module=" << data->args.hipModuleLoadDataEx.module << "," + << " image=" << data->args.hipModuleLoadDataEx.image << "," + << " numOptions=" << data->args.hipModuleLoadDataEx.numOptions << "," + << " options=" << data->args.hipModuleLoadDataEx.options << "," + << " optionValues=" << data->args.hipModuleLoadDataEx.optionValues + << ")"; + break; + case HIP_API_ID_hipRuntimeGetVersion: + oss << "hipRuntimeGetVersion(" + << " runtimeVersion=" << data->args.hipRuntimeGetVersion.runtimeVersion + << ")"; + break; + case HIP_API_ID_hipGetDeviceProperties: + oss << "hipGetDeviceProperties(" + << " prop=" << data->args.hipGetDeviceProperties.prop << "," + << " deviceId=" << data->args.hipGetDeviceProperties.deviceId + << ")"; + break; + case HIP_API_ID_hipFreeArray: + oss << "hipFreeArray(" + << " array=" << data->args.hipFreeArray.array + << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxRelease: + oss << "hipDevicePrimaryCtxRelease(" + << " dev=" << data->args.hipDevicePrimaryCtxRelease.dev + << ")"; + break; + case HIP_API_ID_hipHostGetDevicePointer: + oss << "hipHostGetDevicePointer(" + << " devPtr=" << data->args.hipHostGetDevicePointer.devPtr << "," + << " hstPtr=" << data->args.hipHostGetDevicePointer.hstPtr << "," + << " flags=" << data->args.hipHostGetDevicePointer.flags + << ")"; + break; + case HIP_API_ID_hipMemcpyParam2D: + oss << "hipMemcpyParam2D(" + << " pCopy=" << data->args.hipMemcpyParam2D.pCopy + << ")"; + break; + case HIP_API_ID_hipConfigureCall: + oss << "hipConfigureCall(" + << " gridDim=" << data->args.hipConfigureCall.gridDim << "," + << " blockDim=" << data->args.hipConfigureCall.blockDim << "," + << " sharedMem=" << data->args.hipConfigureCall.sharedMem << "," + << " stream=" << data->args.hipConfigureCall.stream + << ")"; + break; + case HIP_API_ID_hipModuleGetFunction: + oss << "hipModuleGetFunction(" + << " function=" << data->args.hipModuleGetFunction.function << "," + << " module=" << data->args.hipModuleGetFunction.module << "," + << " kname=" << data->args.hipModuleGetFunction.kname + << ")"; + break; + case HIP_API_ID_hipGetDevice: + oss << "hipGetDevice(" + << " deviceId=" << data->args.hipGetDevice.deviceId + << ")"; + break; + case HIP_API_ID_hipGetDeviceCount: + oss << "hipGetDeviceCount(" + << " count=" << data->args.hipGetDeviceCount.count + << ")"; + break; + default: oss << "unknown"; + }; + return strdup(oss.str().c_str()); +}; +#endif + +#endif // _HIP_CBSTR diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_runtime.h b/src/utils/amd_hip/hip/hcc_detail/hip_runtime.h new file mode 100644 index 000000000..60d145c88 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_runtime.h @@ -0,0 +1,481 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/hip_runtime.h + * @brief Contains definitions of APIs for HIP runtime. + */ + +//#pragma once +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H + +#if defined(__HCC__) +#define __HCC_OR_HIP_CLANG__ 1 +#define __HCC_ONLY__ 1 +#define __HIP_CLANG_ONLY__ 0 +#elif defined(__clang__) && defined(__HIP__) +#define __HCC_OR_HIP_CLANG__ 1 +#define __HCC_ONLY__ 0 +#define __HIP_CLANG_ONLY__ 1 +#else +#define __HCC_OR_HIP_CLANG__ 0 +#define __HCC_ONLY__ 0 +#define __HIP_CLANG_ONLY__ 0 +#endif + +//--- +// Top part of file can be compiled with any compiler + +//#include +#if __cplusplus +#include +#else +#include +#include +#include +#endif //__cplusplus + +#if __HCC_OR_HIP_CLANG__ + +#define CUDA_SUCCESS hipSuccess + +#include +#endif // __HCC_OR_HIP_CLANG__ + +#if __HCC__ +// define HIP_ENABLE_PRINTF to enable printf +#ifdef HIP_ENABLE_PRINTF +#define HCC_ENABLE_ACCELERATOR_PRINTF 1 +#endif + +//--- +// Remainder of this file only compiles with HCC +#if defined __HCC__ +#include "grid_launch.h" +#include "hc_printf.hpp" +// TODO-HCC-GL - change this to typedef. +// typedef grid_launch_parm hipLaunchParm ; + +#if GENERIC_GRID_LAUNCH == 0 +#define hipLaunchParm grid_launch_parm +#else +namespace hip_impl { +struct Empty_launch_parm {}; +} // namespace hip_impl +#define hipLaunchParm hip_impl::Empty_launch_parm +#endif // GENERIC_GRID_LAUNCH + +#if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1 +#else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0. +#error(HCC must support GRID_LAUNCH_20) +#endif // GRID_LAUNCH_VERSION + +#endif // HCC + +#if GENERIC_GRID_LAUNCH == 1 && defined __HCC__ +#include "grid_launch_GGL.hpp" +#endif // GENERIC_GRID_LAUNCH + +#endif // HCC + +#if __HCC_OR_HIP_CLANG__ +extern int HIP_TRACE_API; + +#ifdef __cplusplus +#include +#endif +#include +#include +#include +#include +#include +#if __HCC__ + #include +#endif +// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. +#if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) +#define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__ +#endif + +// TODO-HCC add a dummy implementation of assert, need to replace with a proper kernel exit call. +#if __HIP_DEVICE_COMPILE__ == 1 +#undef assert +#define assert(COND) \ + { \ + if (!(COND)) { \ + abort(); \ + } \ + } +#endif + + +// Feature tests: +#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__ +// Device compile and not host compile: + +// 32-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) +#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1) + +// 64-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1) +#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) + +// Doubles +#define __HIP_ARCH_HAS_DOUBLES__ (1) + +// warp cross-lane operations: +#define __HIP_ARCH_HAS_WARP_VOTE__ (1) +#define __HIP_ARCH_HAS_WARP_BALLOT__ (1) +#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1) +#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) + +// sync +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1) +#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) + +// misc +#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) +#define __HIP_ARCH_HAS_3DGRID__ (1) +#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) + +#endif /* Device feature flags */ + + +#define launch_bounds_impl0(requiredMaxThreadsPerBlock) \ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock))) +#define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \ + amdgpu_waves_per_eu(minBlocksPerMultiprocessor))) +#define select_impl_(_1, _2, impl_, ...) impl_ +#define __launch_bounds__(...) \ + select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) + +// Detect if we are compiling C++ mode or C mode +#if defined(__cplusplus) +#define __HCC_CPP__ +#elif defined(__STDC_VERSION__) +#define __HCC_C__ +#endif + +__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; } + +#if __HIP_ARCH_GFX701__ == 0 + +__device__ unsigned __hip_ds_bpermute(int index, unsigned src); +__device__ float __hip_ds_bpermutef(int index, float src); +__device__ unsigned __hip_ds_permute(int index, unsigned src); +__device__ float __hip_ds_permutef(int index, float src); + +__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern); +__device__ float __hip_ds_swizzlef(float src, int pattern); + +__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl); + +#endif //__HIP_ARCH_GFX803__ == 1 + +#endif // __HCC_OR_HIP_CLANG__ + +#if defined __HCC__ + +template < + typename std::common_type::type f> +class Coordinates { + using R = decltype(f(0)); + + struct X { + __device__ operator R() const { return f(0); } + }; + struct Y { + __device__ operator R() const { return f(1); } + }; + struct Z { + __device__ operator R() const { return f(2); } + }; + + public: + static constexpr X x{}; + static constexpr Y y{}; + static constexpr Z z{}; +}; + +static constexpr Coordinates blockDim; +static constexpr Coordinates blockIdx; +static constexpr Coordinates gridDim; +static constexpr Coordinates threadIdx; + +#define hipThreadIdx_x (hc_get_workitem_id(0)) +#define hipThreadIdx_y (hc_get_workitem_id(1)) +#define hipThreadIdx_z (hc_get_workitem_id(2)) + +#define hipBlockIdx_x (hc_get_group_id(0)) +#define hipBlockIdx_y (hc_get_group_id(1)) +#define hipBlockIdx_z (hc_get_group_id(2)) + +#define hipBlockDim_x (hc_get_group_size(0)) +#define hipBlockDim_y (hc_get_group_size(1)) +#define hipBlockDim_z (hc_get_group_size(2)) + +#define hipGridDim_x (hc_get_num_groups(0)) +#define hipGridDim_y (hc_get_num_groups(1)) +#define hipGridDim_z (hc_get_num_groups(2)) + +#endif // defined __HCC__ +#if __HCC_OR_HIP_CLANG__ +extern "C" __device__ void* __hip_malloc(size_t); +extern "C" __device__ void* __hip_free(void* ptr); + +static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); } +static inline __device__ void* free(void* ptr) { return __hip_free(ptr); } + +#if defined(__HCC_ACCELERATOR__) && defined(HC_FEATURE_PRINTF) +template +static inline __device__ void printf(const char* format, All... all) { + hc::printf(format, all...); +} +#elif defined(__HCC_ACCELERATOR__) || __HIP__ +template +static inline __device__ void printf(const char* format, All... all) {} +#endif + +#endif //__HCC_OR_HIP_CLANG__ + +#ifdef __HCC__ + +#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) + +#define HIP_KERNEL_NAME(...) (__VA_ARGS__) +#define HIP_SYMBOL(X) #X + +#if defined __HCC_CPP__ +extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, + grid_launch_parm* lp, const char* kernelNameStr); +extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, + grid_launch_parm* lp, const char* kernelNameStr); +extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, + grid_launch_parm* lp, const char* kernelNameStr); +extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, + grid_launch_parm* lp, const char* kernelNameStr); +extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, grid_launch_parm& lp); + +#if GENERIC_GRID_LAUNCH == 0 +//#warning "Original hipLaunchKernel defined" +// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be +// either size_t or dim3 types +#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \ + do { \ + grid_launch_parm lp; \ + lp.dynamic_group_mem_bytes = _groupMemBytes; \ + hipStream_t trueStream = \ + (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \ + _kernelName(lp, ##__VA_ARGS__); \ + ihipPostLaunchKernel(#_kernelName, trueStream, lp); \ + } while (0) +#endif // GENERIC_GRID_LAUNCH + +#elif defined(__HCC_C__) + +// TODO - develop C interface. + +#endif //__HCC_CPP__ + +/** + * @defgroup HIP-ENV HIP Environment Variables + * @{ + */ +// extern int HIP_PRINT_ENV ; ///< Print all HIP-related environment variables. +// extern int HIP_TRACE_API; ///< Trace HIP APIs. +// extern int HIP_LAUNCH_BLOCKING ; ///< Make all HIP APIs host-synchronous + +/** + * @} + */ + + +// End doxygen API: +/** + * @} + */ + +// +// hip-clang functions +// +#elif defined(__clang__) && defined(__HIP__) + +#define HIP_KERNEL_NAME(...) __VA_ARGS__ +#define HIP_SYMBOL(X) #X + +typedef int hipLaunchParm; + +template +inline void hipLaunchKernelGGL(F&& kernelName, const dim3& numblocks, const dim3& numthreads, + unsigned memperblock, hipStream_t streamId, Args... args) { + kernelName<<>>(args...); +} + +template +inline void hipLaunchKernel(F&& kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t groupMemBytes, hipStream_t stream, Args... args) { + hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{}, + std::move(args)...); +} + +#include + +#pragma push_macro("__DEVICE__") +#define __DEVICE__ static __device__ __forceinline__ + +extern "C" __device__ size_t __ockl_get_local_id(uint); +__DEVICE__ uint __hip_get_thread_idx_x() { return __ockl_get_local_id(0); } +__DEVICE__ uint __hip_get_thread_idx_y() { return __ockl_get_local_id(1); } +__DEVICE__ uint __hip_get_thread_idx_z() { return __ockl_get_local_id(2); } + +extern "C" __device__ size_t __ockl_get_group_id(uint); +__DEVICE__ uint __hip_get_block_idx_x() { return __ockl_get_group_id(0); } +__DEVICE__ uint __hip_get_block_idx_y() { return __ockl_get_group_id(1); } +__DEVICE__ uint __hip_get_block_idx_z() { return __ockl_get_group_id(2); } + +extern "C" __device__ size_t __ockl_get_local_size(uint); +__DEVICE__ uint __hip_get_block_dim_x() { return __ockl_get_local_size(0); } +__DEVICE__ uint __hip_get_block_dim_y() { return __ockl_get_local_size(1); } +__DEVICE__ uint __hip_get_block_dim_z() { return __ockl_get_local_size(2); } + +extern "C" __device__ size_t __ockl_get_num_groups(uint); +__DEVICE__ uint __hip_get_grid_dim_x() { return __ockl_get_num_groups(0); } +__DEVICE__ uint __hip_get_grid_dim_y() { return __ockl_get_num_groups(1); } +__DEVICE__ uint __hip_get_grid_dim_z() { return __ockl_get_num_groups(2); } + +#define __HIP_DEVICE_BUILTIN(DIMENSION, FUNCTION) \ + __declspec(property(get = __get_##DIMENSION)) uint DIMENSION; \ + __DEVICE__ uint __get_##DIMENSION(void) { \ + return FUNCTION; \ + } + +struct __hip_builtin_threadIdx_t { + __HIP_DEVICE_BUILTIN(x,__hip_get_thread_idx_x()); + __HIP_DEVICE_BUILTIN(y,__hip_get_thread_idx_y()); + __HIP_DEVICE_BUILTIN(z,__hip_get_thread_idx_z()); +}; + +struct __hip_builtin_blockIdx_t { + __HIP_DEVICE_BUILTIN(x,__hip_get_block_idx_x()); + __HIP_DEVICE_BUILTIN(y,__hip_get_block_idx_y()); + __HIP_DEVICE_BUILTIN(z,__hip_get_block_idx_z()); +}; + +struct __hip_builtin_blockDim_t { + __HIP_DEVICE_BUILTIN(x,__hip_get_block_dim_x()); + __HIP_DEVICE_BUILTIN(y,__hip_get_block_dim_y()); + __HIP_DEVICE_BUILTIN(z,__hip_get_block_dim_z()); +}; + +struct __hip_builtin_gridDim_t { + __HIP_DEVICE_BUILTIN(x,__hip_get_grid_dim_x()); + __HIP_DEVICE_BUILTIN(y,__hip_get_grid_dim_y()); + __HIP_DEVICE_BUILTIN(z,__hip_get_grid_dim_z()); +}; + +#undef __HIP_DEVICE_BUILTIN +#pragma pop_macro("__DEVICE__") + +extern const __device__ __attribute__((weak)) __hip_builtin_threadIdx_t threadIdx; +extern const __device__ __attribute__((weak)) __hip_builtin_blockIdx_t blockIdx; +extern const __device__ __attribute__((weak)) __hip_builtin_blockDim_t blockDim; +extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim; + + +#define hipThreadIdx_x threadIdx.x +#define hipThreadIdx_y threadIdx.y +#define hipThreadIdx_z threadIdx.z + +#define hipBlockIdx_x blockIdx.x +#define hipBlockIdx_y blockIdx.y +#define hipBlockIdx_z blockIdx.z + +#define hipBlockDim_x blockDim.x +#define hipBlockDim_y blockDim.y +#define hipBlockDim_z blockDim.z + +#define hipGridDim_x gridDim.x +#define hipGridDim_y gridDim.y +#define hipGridDim_z gridDim.z + +#include + +#if __HIP_HCC_COMPAT_MODE__ +// Define HCC work item functions in terms of HIP builtin variables. +#pragma push_macro("__DEFINE_HCC_FUNC") +#define __DEFINE_HCC_FUNC(hc_fun,hip_var) \ +inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \ + if (i==0) \ + return hip_var.x; \ + else if(i==1) \ + return hip_var.y; \ + else \ + return hip_var.z; \ +} + +__DEFINE_HCC_FUNC(workitem_id, threadIdx) +__DEFINE_HCC_FUNC(group_id, blockIdx) +__DEFINE_HCC_FUNC(group_size, blockDim) +__DEFINE_HCC_FUNC(num_groups, gridDim) +#pragma pop_macro("__DEFINE_HCC_FUNC") + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(uint); +inline __device__ __attribute__((always_inline)) uint +hc_get_workitem_absolute_id(int dim) +{ + return (uint)__ockl_get_global_id(dim); +} + +#endif + +// Support std::complex. +#pragma push_macro("__CUDA__") +#define __CUDA__ +#include <__clang_cuda_math_forward_declares.h> +#include <__clang_cuda_complex_builtins.h> +#include +#include +#include +#undef __CUDA__ +#pragma pop_macro("__CUDA__") + + +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent = nullptr, + hipEvent_t stopEvent = nullptr); + +#endif // defined(__clang__) && defined(__HIP__) + +#include + +#endif // HIP_HCC_DETAIL_RUNTIME_H diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_runtime_api.h b/src/utils/amd_hip/hip/hcc_detail/hip_runtime_api.h new file mode 100644 index 000000000..b6ae88729 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_runtime_api.h @@ -0,0 +1,2860 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//#pragma once +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H +/** + * @file hcc_detail/hip_runtime_api.h + * @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or + * special language extensions (-hc mode) ; those functions in hip_runtime.h. + */ +#include +#include + +#ifndef GENERIC_GRID_LAUNCH +#define GENERIC_GRID_LAUNCH 1 +#endif + +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +#define DEPRECATED(msg) __declspec(deprecated(msg)) +#else // !defined(_MSC_VER) +#define DEPRECATED(msg) __attribute__ ((deprecated(msg))) +#endif // !defined(_MSC_VER) + +#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases.For more details please refer https://github.com/ROCm-Developer-Tools/HIP/tree/master/docs/markdown/hip_deprecated_api_list" + +#if defined(__HCC__) && (__hcc_workweek__ < 16155) +#error("This version of HIP requires a newer version of HCC."); +#endif + +#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) +#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) +#define HIP_LAUNCH_PARAM_END ((void*)0x03) + +#ifdef __cplusplus + #define __dparm(x) \ + = x +#else + #define __dparm(x) +#endif + +// Structure definitions: +#ifdef __cplusplus +extern "C" { +#endif + +//--- +// API-visible structures +typedef struct ihipCtx_t* hipCtx_t; + +// Note many APIs also use integer deviceIds as an alternative to the device pointer: +typedef int hipDevice_t; + +typedef struct ihipStream_t* hipStream_t; + +// TODO: IPC implementation + +#define hipIpcMemLazyEnablePeerAccess 0 + +#define HIP_IPC_HANDLE_SIZE 64 + +typedef struct hipIpcMemHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcMemHandle_t; + +// TODO: IPC event handle currently unsupported +struct ihipIpcEventHandle_t; +typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t; + + +// END TODO + +typedef struct ihipModule_t* hipModule_t; + +typedef struct ihipModuleSymbol_t* hipFunction_t; + +typedef struct hipFuncAttributes { + int binaryVersion; + int cacheModeCA; + size_t constSizeBytes; + size_t localSizeBytes; + int maxDynamicSharedSizeBytes; + int maxThreadsPerBlock; + int numRegs; + int preferredShmemCarveout; + int ptxVersion; + size_t sharedSizeBytes; +} hipFuncAttributes; + +typedef struct ihipEvent_t* hipEvent_t; + +enum hipLimit_t { + hipLimitMallocHeapSize = 0x02, +}; + +/** + * @addtogroup GlobalDefs More + * @{ + */ +//! Flags that can be used with hipStreamCreateWithFlags +#define hipStreamDefault \ + 0x00 ///< Default stream creation flags. These are used with hipStreamCreate(). +#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream + + +//! Flags that can be used with hipEventCreateWithFlags: +#define hipEventDefault 0x0 ///< Default flags +#define hipEventBlockingSync \ + 0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency. +#define hipEventDisableTiming \ + 0x2 ///< Disable event's capability to record timing information. May improve performance. +#define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP. +#define hipEventReleaseToDevice \ + 0x40000000 /// < Use a device-scope release when recording this event. This flag is useful to + /// obtain more precise timings of commands between events. The flag is a no-op on + /// CUDA platforms. +#define hipEventReleaseToSystem \ + 0x80000000 /// < Use a system-scope release that when recording this event. This flag is + /// useful to make non-coherent host memory visible to the host. The flag is a + /// no-op on CUDA platforms. + + +//! Flags that can be used with hipHostMalloc +#define hipHostMallocDefault 0x0 +#define hipHostMallocPortable 0x1 ///< Memory is considered allocated by all contexts. +#define hipHostMallocMapped \ + 0x2 ///< Map the allocation into the address space for the current device. The device pointer + ///< can be obtained with #hipHostGetDevicePointer. +#define hipHostMallocWriteCombined 0x4 +#define hipHostMallocCoherent \ + 0x40000000 ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific + ///< allocation. +#define hipHostMallocNonCoherent \ + 0x80000000 ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific + ///< allocation. + + +//! Flags that can be used with hipHostRegister +#define hipHostRegisterDefault 0x0 ///< Memory is Mapped and Portable +#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts. +#define hipHostRegisterMapped \ + 0x2 ///< Map the allocation into the address space for the current device. The device pointer + ///< can be obtained with #hipHostGetDevicePointer. +#define hipHostRegisterIoMemory 0x4 ///< Not supported. + + +#define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield +#define hipDeviceScheduleSpin \ + 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and + ///< may consume more power. +#define hipDeviceScheduleYield \ + 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers + ///< power and is friendlier to other threads in the system. +#define hipDeviceScheduleBlockingSync 0x4 +#define hipDeviceScheduleMask 0x7 + +#define hipDeviceMapHost 0x8 +#define hipDeviceLmemResizeToMax 0x16 + +#define hipArrayDefault 0x00 ///< Default HIP array allocation flag +#define hipArrayLayered 0x01 +#define hipArraySurfaceLoadStore 0x02 +#define hipArrayCubemap 0x04 +#define hipArrayTextureGather 0x08 + +/* + * @brief hipJitOption + * @enum + * @ingroup Enumerations + */ +typedef enum hipJitOption { + hipJitOptionMaxRegisters = 0, + hipJitOptionThreadsPerBlock, + hipJitOptionWallTime, + hipJitOptionInfoLogBuffer, + hipJitOptionInfoLogBufferSizeBytes, + hipJitOptionErrorLogBuffer, + hipJitOptionErrorLogBufferSizeBytes, + hipJitOptionOptimizationLevel, + hipJitOptionTargetFromContext, + hipJitOptionTarget, + hipJitOptionFallbackStrategy, + hipJitOptionGenerateDebugInfo, + hipJitOptionLogVerbose, + hipJitOptionGenerateLineInfo, + hipJitOptionCacheMode, + hipJitOptionSm3xOpt, + hipJitOptionFastCompile, + hipJitOptionNumOptions +} hipJitOption; + + +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipFuncCache_t { + hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default) + hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache + hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory + hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory +} hipFuncCache_t; + + +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipSharedMemConfig { + hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking. + hipSharedMemBankSizeFourByte, ///< Shared mem is banked at 4-bytes intervals and performs best + ///< when adjacent threads access data 4 bytes apart. + hipSharedMemBankSizeEightByte ///< Shared mem is banked at 8-byte intervals and performs best + ///< when adjacent threads access data 4 bytes apart. +} hipSharedMemConfig; + + +/** + * Struct for data in 3D + * + */ +typedef struct dim3 { + uint32_t x; ///< x + uint32_t y; ///< y + uint32_t z; ///< z +#ifdef __cplusplus + dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){}; +#endif +} dim3; + + +// Doxygen end group GlobalDefs +/** @} */ + + +//------------------------------------------------------------------------------------------------- + + +// The handle allows the async commands to use the stream even if the parent hipStream_t goes +// out-of-scope. +// typedef class ihipStream_t * hipStream_t; + + +/* + * Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the + * surrounding hipEvent_t goes out-of-scope. This is handy for cases where the hipEvent_t goes + * out-of-scope but the true event is being written by some async queue or device */ +// typedef struct hipEvent_t { +// struct ihipEvent_t *_handle; +//} hipEvent_t; + + +/** + * @defgroup API HIP API + * @{ + * + * Defines the HIP API. See the individual sections for more information. + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Device Device Management + * @{ + */ + +/** + * @brief Waits on all active streams on current device + * + * When this command is invoked, the host thread gets blocked until all the commands associated + * with streams associated with the device. HIP does not support multiple blocking modes (yet!). + * + * @returns #hipSuccess + * + * @see hipSetDevice, hipDeviceReset + */ +hipError_t hipDeviceSynchronize(void); + + +/** + * @brief The state of current device is discarded and updated to a fresh state. + * + * Calling this function deletes all streams created, memory allocated, kernels running, events + * created. Make sure that no other thread is using the device or streams, memory, kernels, events + * associated with the current device. + * + * @returns #hipSuccess + * + * @see hipDeviceSynchronize + */ +hipError_t hipDeviceReset(void); + + +/** + * @brief Set default device to be used for subsequent hip API calls from this thread. + * + * @param[in] deviceId Valid device in range 0...hipGetDeviceCount(). + * + * Sets @p device as the default device for the calling host thread. Valid device id's are 0... + * (hipGetDeviceCount()-1). + * + * Many HIP APIs implicitly use the "default device" : + * + * - Any device memory subsequently allocated from this host thread (using hipMalloc) will be + * allocated on device. + * - Any streams or events created from this host thread will be associated with device. + * - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device + * (unless a specific stream is specified, in which case the device associated with that stream will + * be used). + * + * This function may be called from any host thread. Multiple host threads may use the same device. + * This function does no synchronization with the previous or new device, and has very little + * runtime overhead. Applications can use hipSetDevice to quickly switch the default device before + * making a HIP runtime call which uses the default device. + * + * The default device is stored in thread-local-storage for each thread. + * Thread-pool implementations may inherit the default device of the previous thread. A good + * practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known + * standard device. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorDeviceAlreadyInUse + * + * @see hipGetDevice, hipGetDeviceCount + */ +hipError_t hipSetDevice(int deviceId); + + +/** + * @brief Return the default device id for the calling host thread. + * + * @param [out] device *device is written with the default device + * + * HIP maintains an default device for each thread using thread-local-storage. + * This device is used implicitly for HIP runtime APIs called by this thread. + * hipGetDevice returns in * @p device the default device for the calling host thread. + * + * @returns #hipSuccess + * + * @see hipSetDevice, hipGetDevicesizeBytes + */ +hipError_t hipGetDevice(int* deviceId); + + +/** + * @brief Return number of compute-capable devices. + * + * @param [output] count Returns number of compute-capable devices. + * + * @returns #hipSuccess, #hipErrorNoDevice + * + * + * Returns in @p *count the number of devices that have ability to run compute commands. If there + * are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice. If 1 or more + * devices can be found, then hipGetDeviceCount returns #hipSuccess. + */ +hipError_t hipGetDeviceCount(int* count); + +/** + * @brief Query for a specific device attribute. + * + * @param [out] pi pointer to value to return + * @param [in] attr attribute to query + * @param [in] deviceId which device to query for information + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId); + +/** + * @brief Returns device properties. + * + * @param [out] prop written with device properties + * @param [in] deviceId which device to query for information + * + * @return #hipSuccess, #hipErrorInvalidDevice + * @bug HCC always returns 0 for maxThreadsPerMultiProcessor + * @bug HCC always returns 0 for regsPerBlock + * @bug HCC always returns 0 for l2CacheSize + * + * Populates hipGetDeviceProperties with information for the specified device. + */ +hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId); + + +/** + * @brief Set L1/Shared cache partition. + * + * @param [in] cacheConfig + * + * @returns #hipSuccess, #hipErrorInitializationError + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored + * on those architectures. + * + */ +hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig); + + +/** + * @brief Set Cache configuration for a specific function + * + * @param [in] cacheConfig + * + * @returns #hipSuccess, #hipErrorInitializationError + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored + * on those architectures. + * + */ +hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig); + +/** + * @brief Get Resource limits of current device + * + * @param [out] pValue + * @param [in] limit + * + * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue + * Note: Currently, only hipLimitMallocHeapSize is available + * + */ +hipError_t hipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit); + + +/** + * @brief Set Cache configuration for a specific function + * + * @param [in] config; + * + * @returns #hipSuccess, #hipErrorInitializationError + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored + * on those architectures. + * + */ +hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config); + +/** + * @brief Returns bank width of shared memory for current device + * + * @param [out] pConfig + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig); + + +/** + * @brief The bank width of shared memory on current device is set + * + * @param [in] config + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config); + +/** + * @brief The current device behavior is changed according the flags passed. + * + * @param [in] flags + * + * The schedule flags impact how HIP waits for the completion of a command running on a device. + * hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted the + * work until the command completes. This offers the lowest latency, but will consume a CPU core + * and may increase power. hipDeviceScheduleYield : The HIP runtime will yield the CPU to + * system so that other tasks can use it. This may increase latency to detect the completion but + * will consume less power and is friendlier to other tasks in the system. + * hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield. + * hipDeviceScheduleAuto : Use a hueristic to select between Spin and Yield modes. If the + * number of HIP contexts is greater than the number of logical processors in the system, use Spin + * scheduling. Else use Yield scheduling. + * + * + * hipDeviceMapHost : Allow mapping host memory. On ROCM, this is always allowed and + * the flag is ignored. hipDeviceLmemResizeToMax : @warning ROCm silently ignores this flag. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess + * + * + */ +hipError_t hipSetDeviceFlags(unsigned flags); + +/** + * @brief Device which matches hipDeviceProp_t is returned + * + * @param [out] device ID + * @param [in] device properties pointer + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop); + +// end doxygen Device +/** + * @} + */ + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Error Error Handling + * @{ + */ + +/** + * @brief Return last error returned by any HIP runtime API call and resets the stored error code to + * #hipSuccess + * + * @returns return code from last HIP called from the active host thread + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread, and then resets the saved error to #hipSuccess. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipGetLastError(void); + + +/** + * @brief Return last error returned by any HIP runtime API call. + * + * @return #hipSuccess + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread. Unlike hipGetLastError, this function does not reset the saved error code. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipPeekAtLastError(void); + + +/** + * @brief Return name of the specified error code in text form. + * + * @param hip_error Error code to convert to name. + * @return const char pointer to the NULL-terminated error name + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorName(hipError_t hip_error); + + +/** + * @brief Return handy text string message to explain the error which occurred + * + * @param hipError Error code to convert to string. + * @return const char pointer to the NULL-terminated error string + * + * @warning : on HCC, this function returns the name of the error (same as hipGetErrorName) + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorString(hipError_t hipError); + +// end doxygen Error +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Stream Stream Management + * @{ + * + * The following Stream APIs are not (yet) supported in HIP: + * - cudaStreamAttachMemAsync + */ + + +/** + * @brief Create an asynchronous stream. + * + * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the + * newly created stream. + * @return #hipSuccess, #hipErrorInvalidValue + * + * Create a new asynchronous stream. @p stream returns an opaque handle that can be used to + * reference the newly created stream in subsequent hipStream* commands. The stream is allocated on + * the heap and will remain allocated even if the handle goes out-of-scope. To release the memory + * used by the stream, applicaiton must call hipStreamDestroy. + * + * @return #hipSuccess, #hipErrorInvalidValue + * + * @see hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipStreamCreate(hipStream_t* stream); + + +/** + * @brief Create an asynchronous stream. + * + * @param[in, out] stream Pointer to new stream + * @param[in ] flags to control stream creation. + * @return #hipSuccess, #hipErrorInvalidValue + * + * Create a new asynchronous stream. @p stream returns an opaque handle that can be used to + * reference the newly created stream in subsequent hipStream* commands. The stream is allocated on + * the heap and will remain allocated even if the handle goes out-of-scope. To release the memory + * used by the stream, applicaiton must call hipStreamDestroy. Flags controls behavior of the + * stream. See #hipStreamDefault, #hipStreamNonBlocking. + * + * + * @see hipStreamCreate, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ + +hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags); + + +/** + * @brief Create an asynchronous stream with the specified priority. + * + * @param[in, out] stream Pointer to new stream + * @param[in ] flags to control stream creation. + * @param[in ] priority of the stream. Lower numbers represent higher priorities. + * @return #hipSuccess, #hipErrorInvalidValue + * + * Create a new asynchronous stream with the specified priority. @p stream returns an opaque handle + * that can be used to reference the newly created stream in subsequent hipStream* commands. The + * stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope. + * To release the memory used by the stream, applicaiton must call hipStreamDestroy. Flags controls + * behavior of the stream. See #hipStreamDefault, #hipStreamNonBlocking. + * + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ + +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority); + + +/** + * @brief Returns numerical values that correspond to the least and greatest stream priority. + * + * @param[in, out] leastPriority pointer in which value corresponding to least priority is returned. + * @param[in, out] greatestPriority pointer in which value corresponding to greatest priority is returned. + * + * Returns in *leastPriority and *greatestPriority the numerical values that correspond to the least + * and greatest stream priority respectively. Stream priorities follow a convention where lower numbers + * imply greater priorities. The range of meaningful stream priorities is given by + * [*greatestPriority, *leastPriority]. If the user attempts to create a stream with a priority value + * that is outside the the meaningful range as specified by this API, the priority is automatically + * clamped to within the valid range. + */ + +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority); + + +/** + * @brief Destroys the specified stream. + * + * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the + * newly created stream. + * @return #hipSuccess #hipErrorInvalidResourceHandle + * + * Destroys the specified stream. + * + * If commands are still executing on the specified stream, some may complete execution before the + * queue is deleted. + * + * The queue may be destroyed while some commands are still inflight, or may wait for all commands + * queued to the stream before destroying it. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery, hipStreamWaitEvent, + * hipStreamSynchronize + */ +hipError_t hipStreamDestroy(hipStream_t stream); + + +/** + * @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or + * #hipErrorNotReady if not. + * + * @param[in] stream stream to query + * + * @return #hipSuccess, #hipErrorNotReady, #hipErrorInvalidResourceHandle + * + * This is thread-safe and returns a snapshot of the current state of the queue. However, if other + * host threads are sending work to the stream, the status may change immediately after the function + * is called. It is typically used for debug. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamSynchronize, + * hipStreamDestroy + */ +hipError_t hipStreamQuery(hipStream_t stream); + + +/** + * @brief Wait for all commands in stream to complete. + * + * @param[in] stream stream identifier. + * + * @return #hipSuccess, #hipErrorInvalidResourceHandle + * + * This command is host-synchronous : the host will block until the specified stream is empty. + * + * This command follows standard null-stream semantics. Specifically, specifying the null stream + * will cause the command to wait for other streams on the same device to complete all pending + * operations. + * + * This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active + * or blocking. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamDestroy + * + */ +hipError_t hipStreamSynchronize(hipStream_t stream); + + +/** + * @brief Make the specified compute stream wait for an event + * + * @param[in] stream stream to make wait. + * @param[in] event event to wait on + * @param[in] flags control operation [must be 0] + * + * @return #hipSuccess, #hipErrorInvalidResourceHandle + * + * This function inserts a wait operation into the specified stream. + * All future work submitted to @p stream will wait until @p event reports completion before + * beginning execution. + * + * This function only waits for commands in the current stream to complete. Notably,, this function + * does not impliciy wait for commands in the default stream to complete, even if the specified + * stream is created with hipStreamNonBlocking = 0. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamDestroy + */ +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags); + + +/** + * @brief Return flags associated with this stream. + * + * @param[in] stream stream to be queried + * @param[in,out] flags Pointer to an unsigned integer in which the stream's flags are returned + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + * + * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidResourceHandle + * + * Return flags associated with this stream in *@p flags. + * + * @see hipStreamCreateWithFlags + */ +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags); + + +/** + * @brief Query the priority of a stream. + * + * @param[in] stream stream to be queried + * @param[in,out] priority Pointer to an unsigned integer in which the stream's priority is returned + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + * + * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidResourceHandle + * + * Query the priority of a stream. The priority is returned in in priority. + * + * @see hipStreamCreateWithFlags + */ +hipError_t hipStreamGetPriority(hipStream_t stream, int* priority); + + +/** + * Stream CallBack struct + */ +typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); + +/** + * @brief Adds a callback to be called on the host after all currently enqueued + * items in the stream have completed. For each + * cudaStreamAddCallback call, a callback will be executed exactly once. + * The callback will block later work in the stream until it is finished. + * @param[in] stream - Stream to add callback to + * @param[in] callback - The function to call once preceding stream operations are complete + * @param[in] userData - User specified data to be passed to the callback function + * @param[in] flags - Reserved for future use, must be 0 + * @return #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorNotSupported + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize, + * hipStreamWaitEvent, hipStreamDestroy, hipStreamCreateWithPriority + * + */ +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags); + + +// end doxygen Stream +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Event Event Management + * @{ + */ + +/** + * @brief Create an event with the specified flags + * + * @param[in,out] event Returns the newly created event. + * @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault, + #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess + + * #hipEventDefault : Default flag. The event will use active synchronization and will support + timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a + CPU to poll on the eevent. + * #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is + called on this event, the thread will block until the event completes. This can increase latency + for the synchroniation but can result in lower power and more resources for other CPU threads. + * #hipEventDisableTiming : Disable recording of timing information. On ROCM platform, timing + information is always recorded and this flag has no performance benefit. + + * @warning On HCC platform, hipEventInterprocess support is under development. Use of this flag + will return an error. + * + * @returns #hipSuccess, #hipErrorInitializationError, #hipErrorInvalidValue, + #hipErrorLaunchFailure, #hipErrorMemoryAllocation + * + * @see hipEventCreate, hipEventSynchronize, hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags); + + +/** + * Create an event + * + * @param[in,out] event Returns the newly created event. + * + * @returns #hipSuccess, #hipErrorInitializationError, #hipErrorInvalidValue, + * #hipErrorLaunchFailure, #hipErrorMemoryAllocation + * + * @see hipEventCreateWithFlags, hipEventRecord, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreate(hipEvent_t* event); + + +/** + * @brief Record an event in the specified stream. + * + * @param[in] event event to record. + * @param[in] stream stream in which to record event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError, + * #hipErrorInvalidResourceHandle, #hipErrorLaunchFailure + * + * hipEventQuery() or hipEventSynchronize() must be used to determine when the event + * transitions from "recording" (after hipEventRecord() is called) to "recorded" + * (when timestamps are set, if requested). + * + * Events which are recorded in a non-NULL stream will transition to + * from recording to "recorded" state when they reach the head of + * the specified stream, after all previous + * commands in that stream have completed executing. + * + * If hipEventRecord() has been previously called on this event, then this call will overwrite any + * existing state in event. + * + * If this function is called on a an event that is currently being recorded, results are undefined + * - either outstanding recording may save state into the event, and the order is not guaranteed. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + * + */ +#ifdef __cplusplus +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL); +#else +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream); +#endif + +/** + * @brief Destroy the specified event. + * + * @param[in] event Event to destroy. + * @returns #hipSuccess, #hipErrorInitializationError, #hipErrorInvalidValue, + * #hipErrorLaunchFailure + * + * Releases memory associated with the event. If the event is recording but has not completed + * recording when hipEventDestroy() is called, the function will return immediately and the + * completion_future resources will be released later, when the hipDevice is synchronized. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, hipEventRecord, + * hipEventElapsedTime + * + * @returns #hipSuccess + */ +hipError_t hipEventDestroy(hipEvent_t event); + + +/** + * @brief Wait for an event to complete. + * + * This function will block until the event is ready, waiting for all previous work in the stream + * specified when event was recorded with hipEventRecord(). + * + * If hipEventRecord() has not been called on @p event, this function returns immediately. + * + * TODO-hcc - This function needs to support hipEventBlockingSync parameter. + * + * @param[in] event Event on which to wait. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError, + * #hipErrorInvalidResourceHandle, #hipErrorLaunchFailure + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventElapsedTime + */ +hipError_t hipEventSynchronize(hipEvent_t event); + + +/** + * @brief Return the elapsed time between two events. + * + * @param[out] ms : Return time between start and stop in ms. + * @param[in] start : Start event. + * @param[in] stop : Stop event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotReady, #hipErrorInvalidResourceHandle, + * #hipErrorInitializationError, #hipErrorLaunchFailure + * + * Computes the elapsed time between two events. Time is computed in ms, with + * a resolution of approximately 1 us. + * + * Events which are recorded in a NULL stream will block until all commands + * on all other streams complete execution, and then record the timestamp. + * + * Events which are recorded in a non-NULL stream will record their timestamp + * when they reach the head of the specified stream, after all previous + * commands in that stream have completed executing. Thus the time that + * the event recorded may be significantly after the host calls hipEventRecord(). + * + * If hipEventRecord() has not been called on either event, then #hipErrorInvalidResourceHandle is + * returned. If hipEventRecord() has been called on both events, but the timestamp has not yet been + * recorded on one or both events (that is, hipEventQuery() would return #hipErrorNotReady on at + * least one of the events), then #hipErrorNotReady is returned. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventSynchronize + */ +hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop); + + +/** + * @brief Query event status + * + * @param[in] event Event to query. + * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidResourceHandle, #hipErrorInvalidValue, + * #hipErrorInitializationError, #hipErrorLaunchFailure + * + * Query the status of the specified event. This function will return #hipErrorNotReady if all + * commands in the appropriate stream (specified to hipEventRecord()) have completed. If that work + * has not completed, or if hipEventRecord() was not called on the event, then #hipSuccess is + * returned. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy, + * hipEventSynchronize, hipEventElapsedTime + */ +hipError_t hipEventQuery(hipEvent_t event); + + +// end doxygen Events +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Memory Memory Management + * @{ + * + * The following CUDA APIs are not currently supported: + * - cudaMalloc3D + * - cudaMalloc3DArray + * - TODO - more 2D, 3D, array APIs here. + * + * + */ + + +/** + * @brief Return attributes for the specified pointer + * + * @param[out] attributes for the specified pointer + * @param[in] pointer to get attributes for + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see hipGetDeviceCount, hipGetDevice, hipSetDevice, hipChooseDevice + */ +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr); + +/** + * @brief Allocate memory on the default accelerator + * + * @param[out] ptr Pointer to the allocated memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorMemoryAllocation, #hipErrorInvalidValue (bad context, null *ptr) + * + * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, + * hipHostFree, hipHostMalloc + */ +hipError_t hipMalloc(void** ptr, size_t size); + +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorMemoryAllocation + * + * @deprecated use hipHostMalloc() instead + */ +DEPRECATED("use hipHostMalloc instead") +hipError_t hipMallocHost(void** ptr, size_t size); + +/** + * @brief Allocate device accessible page locked host memory + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * @param[in] flags Type of host memory allocation + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorMemoryAllocation + * + * @see hipSetDeviceFlags, hipHostFree + */ +hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags); + +/** + * @brief Allocate device accessible page locked host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * @param[in] flags Type of host memory allocation + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorMemoryAllocation + * + * @deprecated use hipHostMalloc() instead + */ +DEPRECATED("use hipHostMalloc instead") +hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags); + +/** + * @brief Get Device pointer from Host Pointer allocated through hipHostMalloc + * + * @param[out] dstPtr Device Pointer mapped to passed host pointer + * @param[in] hstPtr Host Pointer allocated through hipHostMalloc + * @param[in] flags Flags to be passed for extension + * + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + * @see hipSetDeviceFlags, hipHostMalloc + */ +hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags); + +/** + * @brief Return flags associated with host pointer + * + * @param[out] flagsPtr Memory location to store flags + * @param[in] hostPtr Host Pointer allocated through hipHostMalloc + * @return #hipSuccess, #hipErrorInvalidValue + * + * @see hipHostMalloc + */ +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr); + +/** + * @brief Register host memory so it can be accessed from the current device. + * + * @param[out] hostPtr Pointer to host memory to be registered. + * @param[in] sizeBytes size of the host memory + * @param[in] flags. See below. + * + * Flags: + * - #hipHostRegisterDefault Memory is Mapped and Portable + * - #hipHostRegisterPortable Memory is considered registered by all contexts. HIP only supports + * one context so this is always assumed true. + * - #hipHostRegisterMapped Map the allocation into the address space for the current device. + * The device pointer can be obtained with #hipHostGetDevicePointer. + * + * + * After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer. + * On many systems, the mapped device pointer will have a different value than the mapped host + * pointer. Applications must use the device pointer in device code, and the host pointer in device + * code. + * + * On some systems, registered memory is pinned. On some systems, registered memory may not be + * actually be pinned but uses OS or hardware facilities to all GPU access to the host memory. + * + * Developers are strongly encouraged to register memory blocks which are aligned to the host + * cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction). + * + * If registering non-aligned pointers, the application must take care when register pointers from + * the same cache line on different devices. HIP's coarse-grained synchronization model does not + * guarantee correct results if different devices write to different parts of the same cache block - + * typically one of the writes will "win" and overwrite data from the other registered memory + * region. + * + * @return #hipSuccess, #hipErrorMemoryAllocation + * + * @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer + */ +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags); + +/** + * @brief Un-register host pointer + * + * @param[in] hostPtr Host pointer previously registered with #hipHostRegister + * @return Error code + * + * @see hipHostRegister + */ +hipError_t hipHostUnregister(void* hostPtr); + +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] ptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] width Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ + +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height); + +/** + * @brief Free memory allocated by the hcc hip memory allocation API. + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @return #hipSuccess + * @return #hipErrorInvalidDevicePointer (if pointer is invalid, including host pointers allocated + * with hipHostMalloc) + * + * @see hipMalloc, hipMallocPitch, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipFree(void* ptr); + +/** + * @brief Free memory allocated by the hcc hip host memory allocation API. [Deprecated] + * + * @param[in] ptr Pointer to memory to be freed + * @return #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with + hipMalloc) + + * @deprecated use hipHostFree() instead + */ +DEPRECATED("use hipHostFree instead") +hipError_t hipFreeHost(void* ptr); + +/** + * @brief Free memory allocated by the hcc hip host memory allocation API + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @return #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with + * hipMalloc) + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipHostFree(void* ptr); + +/** + * @brief Copy data from src to dst. + * + * It supports memory from host to device, + * device to host, device to device and host to host + * The src and dst must not overlap. + * + * For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice). + * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the + * device where the src data is physically located. For optimal peer-to-peer copies, the copy device + * must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy + * agent as the current device and src/dest as the peerDevice argument. if this is not done, the + * hipMemcpy will still work, but will perform the copy using a staging buffer on the host. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] copyType Memory copy type + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknowni + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); + +/** + * @brief Copy data from Host to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes); + +/** + * @brief Copy data from Device to Host + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes); + +/** + * @brief Copy data from Device to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes); + +/** + * @brief Copy data from Host to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream); + +/** + * @brief Copy data from Device to Host asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream); + +/** + * @brief Copy data from Device to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream); + + +/** + * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area + * pointed to by @p offset bytes from the start of symbol @p symbol. + * + * The memory areas may not overlap. Symbol can either be a variable that resides in global or + * constant memory space, or it can be a character string, naming a variable that resides in global + * or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice + * TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use + * hipErrorUnknown for now. + * + * @param[in] symbolName - Symbol destination on device + * @param[in] src - Data being copy from + * @param[in] sizeBytes - Data size in bytes + * @param[in] offset - Offset from start of symbol in bytes + * @param[in] kind - Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol, + * hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, + * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t sizeBytes, + size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); + + +/** + * @brief Copies the memory address of symbol @p symbolName to @p devPtr + * + * @param[in] symbolName - Symbol on device + * @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol + * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound + * + * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName); + + +/** + * @brief Copies the size of symbol @p symbolName to @p size + * + * @param[in] symbolName - Symbol on device + * @param[out] size - Pointer to the size of the symbol + * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound + * + * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +hipError_t hipGetSymbolSize(size_t* size, const void* symbolName); + + +/** + * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area + * pointed to by @p offset bytes from the start of symbol @p symbol + * + * The memory areas may not overlap. Symbol can either be a variable that resides in global or + * constant memory space, or it can be a character string, naming a variable that resides in global + * or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice + * hipMemcpyToSymbolAsync() is asynchronous with respect to the host, so the call may return before + * copy is complete. + * TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use + * hipErrorUnknown for now. + * + * @param[in] symbolName - Symbol destination on device + * @param[in] src - Data being copy from + * @param[in] sizeBytes - Data size in bytes + * @param[in] offset - Offset from start of symbol in bytes + * @param[in] kind - Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol, + * hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, + * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); + +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes, + size_t offset __dparm(0), hipMemcpyKind kind __dparm( hipMemcpyDeviceToHost )); + +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); + +/** + * @brief Copy data from src to dst asynchronously. + * + * @warning If host or dest are not pinned, the memory copy will be performed synchronously. For + * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously. + * + * @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies. + * For hipMemcpy, the copy is always performed by the device associated with the specified stream. + * + * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is a + * attached to the device where the src data is physically located. For optimal peer-to-peer copies, + * the copy device must be able to access the src and dst pointers (by calling + * hipDeviceEnablePeerAccess with copy agent as the current device and src/dest as the peerDevice + * argument. if this is not done, the hipMemcpy will still work, but will perform the copy using a + * staging buffer on the host. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] accelerator_view Accelerator view which the copy is being enqueued + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol, + * hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, + * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream __dparm(0)); + +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dst Data being filled + * @param[in] constant value to be set + * @param[in] sizeBytes Data size in bytes + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemset(void* dst, int value, size_t sizeBytes); + +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dst Data ptr to be filled + * @param[in] constant value to be set + * @param[in] sizeBytes Data size in bytes + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes); + +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant + * byte value value. + * + * hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value - Value to set for each byte of specified memory + * @param[in] sizeBytes - Size in bytes to set + * @param[in] stream - Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0)); + +/** + * @brief Fills the memory area pointed to by dst with the constant value. + * + * @param[out] dst Pointer to device memory + * @param[in] pitch - data size in bytes + * @param[in] value - constant value to be set + * @param[in] width + * @param[in] height + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ + +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height); + +/** + * @brief Fills asynchronously the memory area pointed to by dst with the constant value. + * + * @param[in] dst Pointer to device memory + * @param[in] pitch - data size in bytes + * @param[in] value - constant value to be set + * @param[in] width + * @param[in] height + * @param[in] stream + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ + +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height,hipStream_t stream __dparm(0)); + +/** + * @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr + * @param[in] value - constant value to be set + * @param[in] extent + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ); + +/** + * @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr + * @param[in] value - constant value to be set + * @param[in] extent + * @param[in] stream + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream __dparm(0)); + +/** + * @brief Query memory info. + * Return snapshot of free memory, and total allocatable memory on the device. + * + * Returns in *free a snapshot of the current free memory. + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * @warning On HCC, the free memory only accounts for memory allocated by this process and may be + *optimistic. + **/ +hipError_t hipMemGetInfo(size_t* free, size_t* total); + + +hipError_t hipMemPtrGetInfo(void* ptr, size_t* size); + + +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] width Requested array allocation width + * @param[in] height Requested array allocation height + * @param[in] flags Requested properties of allocated array + * @return #hipSuccess, #hipErrorMemoryAllocation + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ +hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width, + size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault)); +hipError_t hipArrayCreate(hipArray** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray); + +hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray); + +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent); + +/** + * @brief Frees an array on the device. + * + * @param[in] array Pointer to array to free + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipHostMalloc, hipHostFree + */ +hipError_t hipFreeArray(hipArray* array); + +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] extent Requested array allocation width, height and depth + * @param[in] flags Requested properties of allocated array + * @return #hipSuccess, #hipErrorMemoryAllocation + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ + +hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int flags); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind); +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy); + +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Stream to use + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0)); + +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind); + +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t count, hipMemcpyKind kind); + +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source memory address + * @param[in] woffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] count Size in bytes to copy + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, + size_t count, hipMemcpyKind kind); + +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source array + * @param[in] srcoffset Offset in bytes of source array + * @param[in] count Size of memory copy in bytes + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count); + +/** + * @brief Copies data between host and device. + * + * @param[in] dstArray Destination memory address + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcHost Source host pointer + * @param[in] count Size of memory copy in bytes + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count); + +/** + * @brief Copies data between host and device. + * + * @param[in] p 3D memory copy parameters + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); + +// doxygen end Memory +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup PeerToPeer Device Memory Access + * @{ + * + * @warning PeerToPeer support is experimental. + * + */ + +/** + * @brief Determine if a device can access a peer's memory. + * + * @param [out] canAccessPeer Returns the peer access capability (0 or 1) + * @param [in] device - device from where memory may be accessed. + * @param [in] peerDevice - device where memory is physically located + * + * Returns "1" in @p canAccessPeer if the specified @p device is capable + * of directly accessing memory physically located on peerDevice , or "0" if not. + * + * Returns "0" in @p canAccessPeer if deviceId == peerDeviceId, and both are valid devices : a + * device is not a peer of itself. + * + * @returns #hipSuccess, + * @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices + */ +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId); + + +/** + * @brief Enable direct access from current device's virtual address space to memory allocations + * physically located on a peer device. + * + * Memory which already allocated on peer device will be mapped into the address space of the + * current device. In addition, all future memory allocations on peerDeviceId will be mapped into + * the address space of the current device when the memory is allocated. The peer memory remains + * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. + * + * + * @param [in] peerDeviceId + * @param [in] flags + * + * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device. + */ +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags); + + +/** + * @brief Disable direct access from current device's virtual address space to memory allocations + * physically located on a peer device. + * + * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been + * enabled from the current device. + * + * @param [in] peerDeviceId + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + */ +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId); + +/** + * @brief Get information on memory allocations. + * + * @param [out] pbase - BAse pointer address + * @param [out] psize - Size of allocation + * @param [in] dptr- Device Pointer + * + * @returns #hipSuccess, #hipErrorInvalidDevicePointer + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr); + +#ifndef USE_PEER_NON_UNIFIED +#define USE_PEER_NON_UNIFIED 1 +#endif + +#if USE_PEER_NON_UNIFIED == 1 +/** + * @brief Copies memory from one device to memory on another device. + * + * @param [out] dst - Destination device pointer. + * @param [in] dstDeviceId - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDeviceId - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, + size_t sizeBytes); + +/** + * @brief Copies memory from one device to memory on another device. + * + * @param [out] dst - Destination device pointer. + * @param [in] dstDevice - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDevice - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * @param [in] stream - Stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream __dparm(0)); +#endif + + +// doxygen end PeerToPeer +/** + * @} + */ + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Driver Initialization and Version + * @{ + * + */ + +/** + * @brief Explicitly initializes the HIP runtime. + * + * Most HIP APIs implicitly initialize the HIP runtime. + * This API provides control over the timing of the initialization. + */ +// TODO-ctx - more description on error codes. +hipError_t hipInit(unsigned int flags); + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Context Management + * @{ + */ + +/** + * @brief Create a context and set it as current/ default context + * + * @param [out] ctx + * @param [in] flags + * @param [in] associated device handle + * + * @return #hipSuccess + * + * @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent, + * hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device); + +/** + * @brief Destroy a HIP context. + * + * @param [in] ctx Context to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxDestroy(hipCtx_t ctx); + +/** + * @brief Pop the current/default context and return the popped context. + * + * @param [out] ctx + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxPopCurrent(hipCtx_t* ctx); + +/** + * @brief Push the context to be set as current/ default context + * + * @param [in] ctx + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxPushCurrent(hipCtx_t ctx); + +/** + * @brief Set the passed context as current/default + * + * @param [in] ctx + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxSetCurrent(hipCtx_t ctx); + +/** + * @brief Get the handle of the current/ default context + * + * @param [out] ctx + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetCurrent(hipCtx_t* ctx); + +/** + * @brief Get the handle of the device associated with current/default context + * + * @param [out] device + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize + */ + +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetDevice(hipDevice_t* device); + +/** + * @brief Returns the approximate HIP api version. + * + * @param [in] ctx Context to check + * @param [out] apiVersion + * + * @return #hipSuccess + * + * @warning The HIP feature set does not correspond to an exact CUDA SDK api revision. + * This function always set *apiVersion to 4 as an approximation though HIP supports + * some features which were introduced in later CUDA SDK revisions. + * HIP apps code should not rely on the api revision number here and should + * use arch feature flags to test device capabilities or conditional compilation. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion); + +/** + * @brief Set Cache configuration for a specific function + * + * @param [out] cacheConfiguration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig); + +/** + * @brief Set L1/Shared cache partition. + * + * @param [in] cacheConfiguration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig); + +/** + * @brief Set Shared memory bank configuration. + * + * @param [in] sharedMemoryConfiguration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config); + +/** + * @brief Get Shared memory bank configuration. + * + * @param [out] sharedMemoryConfiguration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig); + +/** + * @brief Blocks until the default context has completed all preceding requested tasks. + * + * @return #hipSuccess + * + * @warning This function waits for all streams on the default context to complete execution, and + * then returns. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxSynchronize(void); + +/** + * @brief Return flags used for creating default context. + * + * @param [out] flags + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetFlags(unsigned int* flags); + +/** + * @brief Enables direct access to memory allocations in a peer context. + * + * Memory which already allocated on peer device will be mapped into the address space of the + * current device. In addition, all future memory allocations on peerDeviceId will be mapped into + * the address space of the current device when the memory is allocated. The peer memory remains + * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. + * + * + * @param [in] peerCtx + * @param [in] flags + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * #hipErrorPeerAccessAlreadyEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags); + +/** + * @brief Disable direct access from current context's virtual address space to memory allocations + * physically located on a peer context.Disables direct access to memory allocations in a peer + * context and unregisters any registered allocations. + * + * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been + * enabled from the current device. + * + * @param [in] peerCtx + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx); + +/** + * @brief Get the state of the primary context. + * + * @param [in] Device to get primary context flags for + * @param [out] Pointer to store flags + * @param [out] Pointer to store context state; 0 = inactive, 1 = active + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active); + +/** + * @brief Release the primary context on the GPU. + * + * @param [in] Device which primary context is released + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning This function return #hipSuccess though doesn't release the primaryCtx by design on + * HIP/HCC path. + */ +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev); + +/** + * @brief Retain the primary context on the GPU. + * + * @param [out] Returned context handle of the new context + * @param [in] Device which primary context is released + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev); + +/** + * @brief Resets the primary context on the GPU. + * + * @param [in] Device which primary context is reset + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev); + +/** + * @brief Set flags for the primary context. + * + * @param [in] Device for which the primary context flags are set + * @param [in] New flags for the device + * + * @returns #hipSuccess, #hipErrorContextAlreadyInUse + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags); + +// doxygen end Context Management +/** + * @} + */ + +/** + * @brief Returns a handle to a compute device + * @param [out] device + * @param [in] ordinal + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceGet(hipDevice_t* device, int ordinal); + +/** + * @brief Returns the compute capability of the device + * @param [out] major + * @param [out] minor + * @param [in] device + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device); + +/** + * @brief Returns an identifer string for the device. + * @param [out] name + * @param [in] len + * @param [in] device + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device); + +/** + * @brief Returns a PCI Bus Id string for the device, overloaded to take int device ID. + * @param [out] pciBusId + * @param [in] len + * @param [in] device + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device); + + +/** + * @brief Returns a handle to a compute device. + * @param [out] device handle + * @param [in] PCI Bus ID + * + * @returns #hipSuccess, #hipErrorInavlidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId); + + +/** + * @brief Returns the total amount of memory on the device. + * @param [out] bytes + * @param [in] device + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device); + +/** + * @brief Returns the approximate HIP driver version. + * + * @param [out] driverVersion + * + * @returns #hipSuccess, #hipErrorInavlidValue + * + * @warning The HIP feature set does not correspond to an exact CUDA SDK driver revision. + * This function always set *driverVersion to 4 as an approximation though HIP supports + * some features which were introduced in later CUDA SDK revisions. + * HIP apps code should not rely on the driver revision number here and should + * use arch feature flags to test device capabilities or conditional compilation. + * + * @see hipRuntimeGetVersion + */ +hipError_t hipDriverGetVersion(int* driverVersion); + +/** + * @brief Returns the approximate HIP Runtime version. + * + * @param [out] runtimeVersion + * + * @returns #hipSuccess, #hipErrorInavlidValue + * + * @warning On HIP/HCC path this function returns HIP runtime patch version however on + * HIP/NVCC path this function return CUDA runtime version. + * + * @see hipDriverGetVersion + */ +hipError_t hipRuntimeGetVersion(int* runtimeVersion); + +/** + * @brief Loads code object from file into a hipModule_t + * + * @param [in] fname + * @param [out] module + * + * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorFileNotFound, + * hipErrorOutOfMemory, hipErrorSharedObjectInitFailed, hipErrorNotInitialized + * + * + */ +hipError_t hipModuleLoad(hipModule_t* module, const char* fname); + +/** + * @brief Frees the module + * + * @param [in] module + * + * @returns hipSuccess, hipInvalidValue + * module is freed and the code objects associated with it are destroyed + * + */ + +hipError_t hipModuleUnload(hipModule_t module); + +/** + * @brief Function with kname will be extracted if present in module + * + * @param [in] module + * @param [in] kname + * @param [out] function + * + * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized, + * hipErrorNotFound, + */ +hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname); + +/** + * @bried Find out attributes for a given function. + * + * @param [out] attr + * @param [in] func + * + * @returns hipSuccess, hipErrorInvalidDeviceFunction + */ + +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func); + +/** + * @brief returns device memory pointer and size of the kernel present in the module with symbol @p + * name + * + * @param [out] dptr + * @param [out] bytes + * @param [in] hmod + * @param [in] name + * + * @returns hipSuccess, hipErrorInvalidValue, hipErrorNotInitialized + */ +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name); + +hipError_t ihipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name); + +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name); +/** + * @brief builds module from code object which resides in host memory. Image is pointer to that + * location. + * + * @param [in] image + * @param [out] module + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadData(hipModule_t* module, const void* image); + +/** + * @brief builds module from code object which resides in host memory. Image is pointer to that + * location. Options are not used. hipModuleLoadData is called. + * + * @param [in] image + * @param [out] module + * @param [in] number of options + * @param [in] options for JIT + * @param [in] option values for JIT + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, + hipJitOption* options, void** optionValues); + +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra + * + * @param [in] f Kernel to launch. + * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. + * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. + * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. + * @param [in] blockDimX X block dimensions specified in work-items + * @param [in] blockDimY Y grid dimension specified in work-items + * @param [in] blockDimZ Z grid dimension specified in work-items + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * kernel can access this with HIP_DYNAMIC_SHARED. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * @param [in] kernelParams + * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and + * must be in the memory layout and alignment expected by the kernel. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please + * refer to hip_porting_driver_api.md for sample usage. + */ +hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, + unsigned int blockDimY, unsigned int blockDimZ, + unsigned int sharedMemBytes, hipStream_t stream, + void** kernelParams, void** extra); + +// doxygen end Version Management +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Profiler Control + * @{ + * + * + * @warning The cudaProfilerInitialize API format for "configFile" is not supported. + * + */ + + +// TODO - expand descriptions: +/** + * @brief Start recording of profiling information + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @warning : hipProfilerStart API is under development. + */ +hipError_t hipProfilerStart(); + + +/** + * @brief Stop recording of profiling information. + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @warning : hipProfilerStop API is under development. + */ +hipError_t hipProfilerStop(); + + +/** + * @} + */ + +// TODO: implement IPC apis + +/** + * @brief Gets an interprocess memory handle for an existing device memory + * allocation + * + * Takes a pointer to the base of an existing device memory allocation created + * with hipMalloc and exports it for use in another process. This is a + * lightweight operation and may be called multiple times on an allocation + * without adverse effects. + * + * If a region of memory is freed with hipFree and a subsequent call + * to hipMalloc returns memory with the same device address, + * hipIpcGetMemHandle will return a unique handle for the + * new memory. + * + * @param handle - Pointer to user allocated hipIpcMemHandle to return + * the handle in. + * @param devPtr - Base pointer to previously allocated device memory + * + * @returns + * hipSuccess, + * hipErrorInvalidResourceHandle, + * hipErrorMemoryAllocation, + * hipErrorMapBufferObjectFailed, + * + */ +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); + +/** + * @brief Opens an interprocess memory handle exported from another process + * and returns a device pointer usable in the local process. + * + * Maps memory exported from another process with hipIpcGetMemHandle into + * the current device address space. For contexts on different devices + * hipIpcOpenMemHandle can attempt to enable peer access between the + * devices as if the user called hipDeviceEnablePeerAccess. This behavior is + * controlled by the hipIpcMemLazyEnablePeerAccess flag. + * hipDeviceCanAccessPeer can determine if a mapping is possible. + * + * Contexts that may open hipIpcMemHandles are restricted in the following way. + * hipIpcMemHandles from each device in a given process may only be opened + * by one context per device per other process. + * + * Memory returned from hipIpcOpenMemHandle must be freed with + * hipIpcCloseMemHandle. + * + * Calling hipFree on an exported memory region before calling + * hipIpcCloseMemHandle in the importing context will result in undefined + * behavior. + * + * @param devPtr - Returned device pointer + * @param handle - hipIpcMemHandle to open + * @param flags - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess + * + * @returns + * hipSuccess, + * hipErrorMapBufferObjectFailed, + * hipErrorInvalidResourceHandle, + * hipErrorTooManyPeers + * + * @note No guarantees are made about the address returned in @p *devPtr. + * In particular, multiple processes may not receive the same address for the same @p handle. + * + */ +hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); + +/** + * @brief Close memory mapped with hipIpcOpenMemHandle + * + * Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation + * in the exporting process as well as imported mappings in other processes + * will be unaffected. + * + * Any resources used to enable peer access will be freed if this is the + * last mapping using them. + * + * @param devPtr - Device pointer returned by hipIpcOpenMemHandle + * + * @returns + * hipSuccess, + * hipErrorMapBufferObjectFailed, + * hipErrorInvalidResourceHandle, + * + */ +hipError_t hipIpcCloseMemHandle(void* devPtr); + + +// hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); +// hipError_t hipIpcCloseMemHandle(void *devPtr); +// // hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); +// hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Clang Launch API to support the triple-chevron syntax + * @{ + */ + +/** + * @brief Configure a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * kernel can access this with HIP_DYNAMIC_SHARED. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + */ +hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), hipStream_t stream __dparm(0)); + + +/** + * @brief Set a kernel argument. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + * @param [in] arg Pointer the argument in host memory. + * @param [in] size Size of the argument. + * @param [in] offset Offset of the argument on the argument stack. + * + */ +hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); + + +/** + * @brief Launch a kernel. + * + * @param [in] func Kernel to launch. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + */ +hipError_t hipLaunchByPtr(const void* func); + + + +/** + * @} + */ + + +#ifdef __cplusplus +} /* extern "c" */ +#endif + +#include + +#ifdef __cplusplus +extern "C" { +#endif +/** + * Callback/Activity API + */ +hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg); +hipError_t hipRemoveApiCallback(uint32_t id); +hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg); +hipError_t hipRemoveActivityCallback(uint32_t id); +static inline const char* hipApiName(const uint32_t& id) { return hip_api_name(id); } +const char* hipKernelNameRef(hipFunction_t f); +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#ifdef __cplusplus + +hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t size = UINT_MAX); + +hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, + const void* devPtr, const struct hipChannelFormatDesc* desc, + size_t size, textureReference* tex); + +/* + * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture + *reference tex. + * + * @p desc describes how the memory is interpreted when fetching values from the texture. The @p + *offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any + *memory previously bound to tex is unbound. + * + * @param[in] offset - Offset in bytes + * @param[out] tex - texture to bind + * @param[in] devPtr - Memory area on device + * @param[in] desc - Channel format + * @param[in] size - Size of the memory area pointed to by devPtr + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown + **/ +template +hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, + const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { + return ihipBindTextureImpl(dim, readMode, offset, devPtr, &desc, size, &tex); +} + +/* + * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture + *reference tex. + * + * @p desc describes how the memory is interpreted when fetching values from the texture. The @p + *offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any + *memory previously bound to tex is unbound. + * + * @param[in] offset - Offset in bytes + * @param[in] tex - texture to bind + * @param[in] devPtr - Memory area on device + * @param[in] size - Size of the memory area pointed to by devPtr + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown + **/ +template +hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, + size_t size = UINT_MAX) { + return ihipBindTextureImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex); +} + +// C API +hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t width, size_t height, + size_t pitch); + +hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, + const void* devPtr, const struct hipChannelFormatDesc* desc, + size_t width, size_t height, textureReference* tex); + +template +hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, + const void* devPtr, size_t width, size_t height, size_t pitch) { + return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), width, height, + &tex); +} + +template +hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, + const void* devPtr, const struct hipChannelFormatDesc& desc, + size_t width, size_t height, size_t pitch) { + return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &desc, width, height, &tex); +} + +// C API +hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, + const hipChannelFormatDesc* desc); + +hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode, + hipArray_const_t array, + const struct hipChannelFormatDesc& desc, + textureReference* tex); + +template +hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array) { + return ihipBindTextureToArrayImpl(dim, readMode, array, tex.channelDesc, &tex); +} + +template +hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array, + const struct hipChannelFormatDesc& desc) { + return ihipBindTextureToArrayImpl(dim, readMode, array, desc, &tex); +} + +template +inline static hipError_t hipBindTextureToArray(struct texture *tex, + hipArray_const_t array, + const struct hipChannelFormatDesc* desc) { + return ihipBindTextureToArrayImpl(dim, readMode, array, *desc, tex); +} + +// C API +hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc); + +template +hipError_t hipBindTextureToMipmappedArray(const texture& tex, + hipMipmappedArray_const_t mipmappedArray) { + return hipSuccess; +} + +template +hipError_t hipBindTextureToMipmappedArray(const texture& tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc& desc) { + return hipSuccess; +} + +/* + * @brief Unbinds the textuer bound to @p tex + * + * @param[in] tex - texture to unbind + * + * @return #hipSuccess + **/ +hipError_t hipUnbindTexture(const textureReference* tex); + +extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject); + +template +hipError_t hipUnbindTexture(struct texture& tex) { + return ihipUnbindTextureImpl(tex.textureObject); +} + +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array); +hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref); +hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol); + +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc); + +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); + +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t textureObject); +hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, + hipTextureObject_t textureObject); +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject); +hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags); + +hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am); + +hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm); + +hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags); + +hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents); + +hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr, + size_t size); + +hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t devPtr, size_t pitch); + +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc); + +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); + +// doxygen end Texture +/** + * @} + */ + + +#endif + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup HCC_Specific HCC-Specific Accessors + * @{ + * + * The following calls are only supported when compiler HIP with HCC. + * To produce portable code, use of these calls must be guarded #ifdef checks: + * @code + * #ifdef __HCC__ + * hc::accelerator acc; + hipError_t err = hipHccGetAccelerator(deviceId, &acc) + * #endif + * @endcode + * + */ + +// end-group HCC_Specific +/** + * @} + */ + + +// doxygen end HIP API +/** + * @} + */ + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_surface_types.h b/src/utils/amd_hip/hip/hcc_detail/hip_surface_types.h new file mode 100644 index 000000000..f74c01d70 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_surface_types.h @@ -0,0 +1,54 @@ +/* +Copyright (c) 2015- present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/hip_surface_types.h + * @brief Defines surface types for HIP runtime. + */ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H + +#include + +/** + * An opaque value that represents a hip surface object + */ +typedef unsigned long long hipSurfaceObject_t; + +/** + * hip surface reference + */ +struct surfaceReference { + hipSurfaceObject_t surfaceObject; +}; + +/** + * hip surface boundary modes + */ +enum hipSurfaceBoundaryMode { + hipBoundaryModeZero = 0, + hipBoundaryModeTrap = 1, + hipBoundaryModeClamp = 2 +}; + +#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H */ diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_texture_types.h b/src/utils/amd_hip/hip/hcc_detail/hip_texture_types.h new file mode 100644 index 000000000..0a68b507e --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_texture_types.h @@ -0,0 +1,77 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/hip_texture_types.h + * @brief Defines the different newt vector types for HIP runtime. + */ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H + +/******************************************************************************* + * * + * * + * * + *******************************************************************************/ +#include +//#include +#include +#include + +#if __cplusplus + +/******************************************************************************* + * * + * * + * * + *******************************************************************************/ + +template +struct texture : public textureReference { + texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint, + enum hipTextureAddressMode aMode = hipAddressModeClamp) { + normalized = norm; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = hipCreateChannelDesc(); + sRGB = 0; + } + + texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode, + struct hipChannelFormatDesc desc) { + normalized = norm; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = desc; + sRGB = 0; + } +}; + +#endif /* __cplusplus */ + +#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H */ diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_vector_types.h b/src/utils/amd_hip/hip/hcc_detail/hip_vector_types.h new file mode 100644 index 000000000..1df6385fa --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/hip_vector_types.h @@ -0,0 +1,880 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/hip_vector_types.h + * @brief Defines the different newt vector types for HIP runtime. + */ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H + +#if defined(__HCC__) && (__hcc_workweek__ < 16032) +#error("This version of HIP requires a newer version of HCC."); +#endif + +#include "hip/hcc_detail/host_defines.h" + +#if !defined(_MSC_VER) || __clang__ +#if defined(__clang__) + #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n))) +#elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax. + #define __ROUND_UP_TO_NEXT_POT__(x) \ + (1 << (31 - __builtin_clz(x) + (x > (1 << (31 - __builtin_clz(x)))))) + #define __NATIVE_VECTOR__(n, T) \ + __attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T)))) +#endif + +#if defined(__cplusplus) + #include + + template struct HIP_vector_base; + + template + struct HIP_vector_base { + typedef T Native_vec_ __NATIVE_VECTOR__(1, T); + + union { + Native_vec_ data; + struct { + T x; + }; + }; + }; + + template + struct HIP_vector_base { + typedef T Native_vec_ __NATIVE_VECTOR__(2, T); + + union { + Native_vec_ data; + struct { + T x; + T y; + }; + }; + }; + + template + struct HIP_vector_base { + typedef T Native_vec_ __NATIVE_VECTOR__(3, T); + + union { + Native_vec_ data; + struct { + T x; + T y; + T z; + }; + }; + }; + + template + struct HIP_vector_base { + typedef T Native_vec_ __NATIVE_VECTOR__(4, T); + + union { + Native_vec_ data; + struct { + T x; + T y; + T z; + T w; + }; + }; + }; + + template + struct HIP_vector_type : public HIP_vector_base { + using HIP_vector_base::data; + using typename HIP_vector_base::Native_vec_; + + __host__ __device__ + HIP_vector_type() = default; + template< + typename U, + typename std::enable_if< + std::is_convertible{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type(U x) noexcept + { + for (auto i = 0u; i != rank; ++i) data[i] = x; + } + template< // TODO: constrain based on type as well. + typename... Us, + typename std::enable_if< + (rank > 1) && sizeof...(Us) == rank>::type* = nullptr> + __host__ __device__ + HIP_vector_type(Us... xs) noexcept { data = Native_vec_{static_cast(xs)...}; } + __host__ __device__ + HIP_vector_type(const HIP_vector_type&) = default; + __host__ __device__ + HIP_vector_type(HIP_vector_type&&) = default; + __host__ __device__ + ~HIP_vector_type() = default; + + __host__ __device__ + HIP_vector_type& operator=(const HIP_vector_type&) = default; + __host__ __device__ + HIP_vector_type& operator=(HIP_vector_type&&) = default; + + // Operators + __host__ __device__ + HIP_vector_type& operator++() noexcept + { + return *this += HIP_vector_type{1}; + } + __host__ __device__ + HIP_vector_type operator++(int) noexcept + { + auto tmp(*this); + ++*this; + return tmp; + } + __host__ __device__ + HIP_vector_type& operator--() noexcept + { + return *this -= HIP_vector_type{1}; + } + __host__ __device__ + HIP_vector_type operator--(int) noexcept + { + auto tmp(*this); + --*this; + return tmp; + } + __host__ __device__ + HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept + { + data += x.data; + return *this; + } + __host__ __device__ + HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept + { + data -= x.data; + return *this; + } + template< + typename U, + typename std::enable_if< + std::is_convertible{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator-=(U x) noexcept + { + return *this -= HIP_vector_type{x}; + } + __host__ __device__ + HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept + { + data *= x.data; + return *this; + } + __host__ __device__ + HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept + { + data /= x.data; + return *this; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type operator-() noexcept + { + auto tmp(*this); + tmp.data = -tmp.data; + return tmp; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type operator~() noexcept + { + HIP_vector_type r{*this}; + r.data = ~r.data; + return r; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept + { + data %= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept + { + data ^= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept + { + data |= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept + { + data &= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept + { + data >>= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept + { + data <<= x.data; + return *this; + } + }; + + + template + __host__ __device__ + inline + HIP_vector_type operator+( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} += y; + } + template + __host__ __device__ + inline + HIP_vector_type operator+( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} += y; + } + template + __host__ __device__ + inline + HIP_vector_type operator+( + U x, const HIP_vector_type& y) noexcept + { + return y + x; + } + + template + __host__ __device__ + inline + HIP_vector_type operator-( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} -= y; + } + template + __host__ __device__ + inline + HIP_vector_type operator-( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} -= y; + } + template + __host__ __device__ + inline + HIP_vector_type operator-( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} -= y; + } + + template + __host__ __device__ + inline + HIP_vector_type operator*( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} *= y; + } + template + __host__ __device__ + inline + HIP_vector_type operator*( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} *= y; + } + template + __host__ __device__ + inline + HIP_vector_type operator*( + U x, const HIP_vector_type& y) noexcept + { + return y * x; + } + + template + __host__ __device__ + inline + HIP_vector_type operator/( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} /= y; + } + template + __host__ __device__ + inline + HIP_vector_type operator/( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} /= y; + } + template + __host__ __device__ + inline + HIP_vector_type operator/( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} /= y; + } + + template + __host__ __device__ + inline + bool operator==( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + auto tmp = x.data == y.data; + for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; + return true; + } + template + __host__ __device__ + inline + bool operator==(const HIP_vector_type& x, U y) noexcept + { + return x == HIP_vector_type{y}; + } + template + __host__ __device__ + inline + bool operator==(U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} == y; + } + + template + __host__ __device__ + inline + bool operator!=( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return !(x == y); + } + template + __host__ __device__ + inline + bool operator!=(const HIP_vector_type& x, U y) noexcept + { + return !(x == y); + } + template + __host__ __device__ + inline + bool operator!=(U x, const HIP_vector_type& y) noexcept + { + return !(x == y); + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator%( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} %= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator%( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} %= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator%( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} %= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator^( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} ^= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator^( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} ^= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator^( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} ^= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator|( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} |= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator|( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} |= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator|( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} |= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator&( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} &= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator&( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} &= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator&( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} &= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator>>( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} >>= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator>>( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} >>= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator>>( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} >>= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator<<( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} <<= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator<<( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} <<= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator<<( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} <<= y; + } + + #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ + using CUDA_name##1 = HIP_vector_type;\ + using CUDA_name##2 = HIP_vector_type;\ + using CUDA_name##3 = HIP_vector_type;\ + using CUDA_name##4 = HIP_vector_type; +#else + #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ + typedef T CUDA_name##_impl1 __NATIVE_VECTOR__(1, T);\ + typedef T CUDA_name##_impl2 __NATIVE_VECTOR__(2, T);\ + typedef T CUDA_name##_impl3 __NATIVE_VECTOR__(3, T);\ + typedef T CUDA_name##_impl4 __NATIVE_VECTOR__(4, T);\ + typedef struct {\ + union {\ + CUDA_name##_impl1 data;\ + struct {\ + T x;\ + };\ + };\ + } CUDA_name##1;\ + typedef struct {\ + union {\ + CUDA_name##_impl2 data;\ + struct {\ + T x;\ + T y;\ + };\ + };\ + } CUDA_name##2;\ + typedef struct {\ + union {\ + CUDA_name##_impl3 data;\ + struct {\ + T x;\ + T y;\ + T z;\ + };\ + };\ + } CUDA_name##3;\ + typedef struct {\ + union {\ + CUDA_name##_impl4 data;\ + struct {\ + T x;\ + T y;\ + T z;\ + T w;\ + };\ + };\ + } CUDA_name##4; +#endif + +__MAKE_VECTOR_TYPE__(uchar, unsigned char); +__MAKE_VECTOR_TYPE__(char, char); +__MAKE_VECTOR_TYPE__(ushort, unsigned short); +__MAKE_VECTOR_TYPE__(short, short); +__MAKE_VECTOR_TYPE__(uint, unsigned int); +__MAKE_VECTOR_TYPE__(int, int); +__MAKE_VECTOR_TYPE__(ulong, unsigned long); +__MAKE_VECTOR_TYPE__(long, long); +__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long); +__MAKE_VECTOR_TYPE__(longlong, long long); +__MAKE_VECTOR_TYPE__(float, float); +__MAKE_VECTOR_TYPE__(double, double); + +#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ + __device__ __host__ \ + static \ + inline \ + type make_##type(comp x) { type r = {x}; return r; } + +#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ + __device__ __host__ \ + static \ + inline \ + type make_##type(comp x, comp y) { type r = {x, y}; return r; } + +#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ + __device__ __host__ \ + static \ + inline \ + type make_##type(comp x, comp y, comp z) { type r = {x, y, z}; return r; } + +#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ + __device__ __host__ \ + static \ + inline \ + type make_##type(comp x, comp y, comp z, comp w) { \ + type r = {x, y, z, w}; \ + return r; \ + } + +DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1); +DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2); +DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4); + +DECLOP_MAKE_ONE_COMPONENT(signed char, char1); +DECLOP_MAKE_TWO_COMPONENT(signed char, char2); +DECLOP_MAKE_THREE_COMPONENT(signed char, char3); +DECLOP_MAKE_FOUR_COMPONENT(signed char, char4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1); +DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2); +DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4); + +DECLOP_MAKE_ONE_COMPONENT(signed short, short1); +DECLOP_MAKE_TWO_COMPONENT(signed short, short2); +DECLOP_MAKE_THREE_COMPONENT(signed short, short3); +DECLOP_MAKE_FOUR_COMPONENT(signed short, short4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1); +DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2); +DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4); + +DECLOP_MAKE_ONE_COMPONENT(signed int, int1); +DECLOP_MAKE_TWO_COMPONENT(signed int, int2); +DECLOP_MAKE_THREE_COMPONENT(signed int, int3); +DECLOP_MAKE_FOUR_COMPONENT(signed int, int4); + +DECLOP_MAKE_ONE_COMPONENT(float, float1); +DECLOP_MAKE_TWO_COMPONENT(float, float2); +DECLOP_MAKE_THREE_COMPONENT(float, float3); +DECLOP_MAKE_FOUR_COMPONENT(float, float4); + +DECLOP_MAKE_ONE_COMPONENT(double, double1); +DECLOP_MAKE_TWO_COMPONENT(double, double2); +DECLOP_MAKE_THREE_COMPONENT(double, double3); +DECLOP_MAKE_FOUR_COMPONENT(double, double4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1); +DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2); +DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4); + +DECLOP_MAKE_ONE_COMPONENT(signed long, long1); +DECLOP_MAKE_TWO_COMPONENT(signed long, long2); +DECLOP_MAKE_THREE_COMPONENT(signed long, long3); +DECLOP_MAKE_FOUR_COMPONENT(signed long, long4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1); +DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2); +DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4); + +DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1); +DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2); +DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3); +DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4); +#else // defined(_MSC_VER) +#include +#include +#include +#include + +typedef union { char data; } char1; +typedef union { char data[2]; } char2; +typedef union { char data[4]; } char4; +typedef union { char4 data; } char3; +typedef union { __m64 data; } char8; +typedef union { __m128i data; } char16; + +typedef union { unsigned char data; } uchar1; +typedef union { unsigned char data[2]; } uchar2; +typedef union { unsigned char data[4]; } uchar4; +typedef union { uchar4 data; } uchar3; +typedef union { __m64 data; } uchar8; +typedef union { __m128i data; } uchar16; + +typedef union { short data; } short1; +typedef union { short data[2]; } short2; +typedef union { __m64 data; } short4; +typedef union { short4 data; } short3; +typedef union { __m128i data; } short8; +typedef union { __m128i data[2]; } short16; + +typedef union { unsigned short data; } ushort1; +typedef union { unsigned short data[2]; } ushort2; +typedef union { __m64 data; } ushort4; +typedef union { ushort4 data; } ushort3; +typedef union { __m128i data; } ushort8; +typedef union { __m128i data[2]; } ushort16; + +typedef union { int data; } int1; +typedef union { __m64 data; } int2; +typedef union { __m128i data; } int4; +typedef union { int4 data; } int3; +typedef union { __m128i data[2]; } int8; +typedef union { __m128i data[4];} int16; + +typedef union { unsigned int data; } uint1; +typedef union { __m64 data; } uint2; +typedef union { __m128i data; } uint4; +typedef union { uint4 data; } uint3; +typedef union { __m128i data[2]; } uint8; +typedef union { __m128i data[4]; } uint16; + +#if !defined(_WIN64) +typedef union { int data; } long1; +typedef union { __m64 data; } long2; +typedef union { __m128i data; } long4; +typedef union { long4 data; } long3; +typedef union { __m128i data[2]; } long8; +typedef union { __m128i data[4]; } long16; + +typedef union { unsigned int data; } ulong1; +typedef union { __m64 data; } ulong2; +typedef union { __m128i data; } ulong4; +typedef union { ulong4 data; } ulong3; +typedef union { __m128i data[2]; } ulong8; +typedef union { __m128i data[4]; } ulong16; +#else // defined(_WIN64) +typedef union { __m64 data; } long1; +typedef union { __m128i data; } long2; +typedef union { __m128i data[2]; } long4; +typedef union { long4 data; } long3; +typedef union { __m128i data[4]; } long8; +typedef union { __m128i data[8]; } long16; + +typedef union { __m64 data; } ulong1; +typedef union { __m128i data; } ulong2; +typedef union { __m128i data[2]; } ulong4; +typedef union { ulong4 data; } ulong3; +typedef union { __m128i data[4]; } ulong8; +typedef union { __m128i data[8]; } ulong16; +#endif // defined(_WIN64) + +typedef union { __m64 data; } longlong1; +typedef union { __m128i data; } longlong2; +typedef union { __m128i data[2]; } longlong4; +typedef union { longlong4 data; } longlong3; +typedef union { __m128i data[4]; } longlong8; +typedef union { __m128i data[8]; } longlong16; + +typedef union { __m64 data; } ulonglong1; +typedef union { __m128i data; } ulonglong2; +typedef union { __m128i data[2]; } ulonglong4; +typedef union { ulonglong4 data; } ulonglong3; +typedef union { __m128i data[4]; } ulonglong8; +typedef union { __m128i data[8]; } ulonglong16; + +typedef union { float data; } float1; +typedef union { __m64 data; } float2; +typedef union { __m128 data; } float4; +typedef union { float4 data; } float3; +typedef union { __m256 data; } float8; +typedef union { __m256 data[2]; } float16; + +typedef union { double data; } double1; +typedef union { __m128d data; } double2; +typedef union { __m256d data; } double4; +typedef union { double4 data; } double3; +typedef union { __m256d data[2]; } double8; +typedef union { __m256d data[4]; } double16; + +#endif // defined(_MSC_VER) +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/host_defines.h b/src/utils/amd_hip/hip/hcc_detail/host_defines.h new file mode 100644 index 000000000..5d1c3d8f6 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/host_defines.h @@ -0,0 +1,95 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/host_defines.h + * @brief TODO-doc + */ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H + + +// Add guard to Generic Grid Launch method +#ifndef GENERIC_GRID_LAUNCH +#define GENERIC_GRID_LAUNCH 1 +#endif + +#ifdef __HCC__ +/** + * Function and kernel markers + */ +#define __host__ __attribute__((cpu)) +#define __device__ __attribute__((hc)) + +#if GENERIC_GRID_LAUNCH == 0 +#define __global__ __attribute__((hc_grid_launch)) __attribute__((used)) +#else +#if __hcc_workweek__ >= 17481 +#define __global__ __attribute__((annotate("__HIP_global_function__"), cpu, hc, used)) +#else +#define __global__ __attribute__((hc, used)) +#endif +#endif // GENERIC_GRID_LAUNCH + +#define __noinline__ __attribute__((noinline)) +#define __forceinline__ inline __attribute__((always_inline)) + + +/* + * Variable Type Qualifiers: + */ +// _restrict is supported by the compiler +#define __shared__ tile_static +#define __constant__ __attribute__((hc)) + +#elif defined(__clang__) && defined(__HIP__) + +#define __host__ __attribute__((host)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) + +#define __noinline__ __attribute__((noinline)) +#define __forceinline__ inline __attribute__((always_inline)) + +#else + +// Non-HCC compiler +/** + * Function and kernel markers + */ +#define __host__ +#define __device__ + +#define __global__ + +#define __noinline__ +#define __forceinline__ + +#define __shared__ +#define __constant__ + +#endif + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/llvm_intrinsics.h b/src/utils/amd_hip/hip/hcc_detail/llvm_intrinsics.h new file mode 100644 index 000000000..dc6fd05c5 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/llvm_intrinsics.h @@ -0,0 +1,70 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/llvm_intrinsics.h + * @brief Contains declarations for wrapper functions for llvm intrinsics + * like llvm.amdgcn.s.barrier. + */ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H + +#include "hip/hcc_detail/host_defines.h" + +__device__ +__attribute__((convergent)) +ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32"); + +__device__ +unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); + +__device__ +unsigned int __llvm_bitrev_b32(unsigned int src0) __asm("llvm.bitreverse.i32"); + +__device__ +uint64_t __llvm_bitrev_b64(uint64_t src0) __asm("llvm.bitreverse.i64"); + +extern +__device__ +__attribute__((const)) +unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo"); + +extern +__device__ +__attribute__((const)) +unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi"); + +__device__ +int __llvm_amdgcn_ds_bpermute(int index, int src) __asm("llvm.amdgcn.ds.bpermute"); + +__device__ +int __llvm_amdgcn_ds_permute(int index, int src) __asm("llvm.amdgcn.ds.permute"); + +__device__ +int __llvm_amdgcn_ds_swizzle(int index, int pattern) __asm("llvm.amdgcn.ds.swizzle"); + +__device__ +int __llvm_amdgcn_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, + bool bound_ctrl) __asm("llvm.amdgcn.mov.dpp.i32"); + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/macro_based_grid_launch.hpp b/src/utils/amd_hip/hip/hcc_detail/macro_based_grid_launch.hpp new file mode 100644 index 000000000..8726b60dc --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/macro_based_grid_launch.hpp @@ -0,0 +1,798 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "concepts.hpp" +#include "helpers.hpp" + +#include "hc.hpp" +#include "hip/hip_hcc.h" +#include "hip_runtime.h" + +#include +#include +#include +#include +#include + +namespace hip_impl { +namespace { +struct New_grid_launch_tag {}; +struct Old_grid_launch_tag {}; + +template +class RAII_guard { + D dtor_; + + public: + RAII_guard() = default; + + RAII_guard(const C& ctor, D dtor) : dtor_{std::move(dtor)} { ctor(); } + + RAII_guard(const RAII_guard&) = default; + RAII_guard(RAII_guard&&) = default; + + RAII_guard& operator=(const RAII_guard&) = default; + RAII_guard& operator=(RAII_guard&&) = default; + + ~RAII_guard() { dtor_(); } +}; + +template +RAII_guard make_RAII_guard(const C& ctor, D dtor) { + return RAII_guard{ctor, std::move(dtor)}; +} + +template +using is_new_grid_launch_t = typename std::conditional{}, New_grid_launch_tag, + Old_grid_launch_tag>::type; +} // namespace + +// TODO: - dispatch rank should be derived from the domain dimensions passed +// in, and not always assumed to be 3; + +template +requires(Domain == + {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag, dim3 num_blocks, + dim3 dim_blocks, int group_mem_bytes, + const hc::accelerator_view& acc_v, K k) { + const auto d = + hc::extent<3>{num_blocks.z * dim_blocks.z, num_blocks.y * dim_blocks.y, + num_blocks.x * dim_blocks.x} + .tile_with_dynamic(dim_blocks.z, dim_blocks.y, dim_blocks.x, group_mem_bytes); + + try { + hc::parallel_for_each(acc_v, d, k); + } catch (std::exception& ex) { + std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl; + throw; + } +} + +// TODO: these are workarounds, they should be removed. + +hc::accelerator_view lock_stream_hip_(hipStream_t&, void*&); +void print_prelaunch_trace_(const char*, dim3, dim3, int, hipStream_t); +void unlock_stream_hip_(hipStream_t, void*, const char*, hc::accelerator_view*); + +template +requires(Domain == {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag, + dim3 num_blocks, dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + const char* kernel_name, K k) { + void* lck_stream = nullptr; + auto acc_v = lock_stream_hip_(stream, lck_stream); + auto stream_guard = + make_RAII_guard(std::bind(print_prelaunch_trace_, kernel_name, num_blocks, dim_blocks, + group_mem_bytes, stream), + std::bind(unlock_stream_hip_, stream, lck_stream, kernel_name, &acc_v)); + + try { + grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), + group_mem_bytes, acc_v, std::move(k)); + } catch (std::exception& ex) { + std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl; + throw; + } +} + +template +requires(Domain == + {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_(Old_grid_launch_tag, + dim3 num_blocks, dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, K k) { + grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), + group_mem_bytes, std::move(stream), std::move(k)); +} + +template +requires(Domain == {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_( + Old_grid_launch_tag, dim3 num_blocks, dim3 dim_blocks, int group_mem_bytes, hipStream_t stream, + const char* kernel_name, K k) { + grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), + group_mem_bytes, std::move(stream), kernel_name, std::move(k)); +} + +template +requires(Domain == {Ts...}) inline std::enable_if_t< + !std::is_function::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks, + int group_mem_bytes, hipStream_t stream, + const char* kernel_name, K k) { + grid_launch_hip_impl_(is_new_grid_launch_t{}, std::move(num_blocks), + std::move(dim_blocks), group_mem_bytes, std::move(stream), kernel_name, + std::move(k)); +} + +template +requires(Domain == {Ts...}) inline std::enable_if_t< + !std::is_function::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks, + int group_mem_bytes, hipStream_t stream, K k) { + grid_launch_hip_impl_(is_new_grid_launch_t{}, std::move(num_blocks), + std::move(dim_blocks), group_mem_bytes, std::move(stream), std::move(k)); +} + +// TODO: these are temporary and purposefully noisy and disruptive. +#define make_kernel_name_hip(k, n) \ + HIP_kernel_functor_name_begin##_##k##_##HIP_kernel_functor_name_end##_##n + +#define make_kernel_functor_hip_30(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23, p24, p25, p26, p27) \ + struct make_kernel_name_hip(function_name, 28) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + std::decay_t _p24_; \ + std::decay_t _p25_; \ + std::decay_t _p26_; \ + std::decay_t _p27_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_, _p24_, _p25_, _p26_, _p27_); \ + } \ + } +#define make_kernel_functor_hip_29(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23, p24, p25, p26) \ + struct make_kernel_name_hip(function_name, 27) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + std::decay_t _p24_; \ + std::decay_t _p25_; \ + std::decay_t _p26_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_, _p24_, _p25_, _p26_); \ + } \ + } +#define make_kernel_functor_hip_28(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23, p24, p25) \ + struct make_kernel_name_hip(function_name, 26) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + std::decay_t _p24_; \ + std::decay_t _p25_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_, _p24_, _p25_); \ + } \ + } +#define make_kernel_functor_hip_27(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23, p24) \ + struct make_kernel_name_hip(function_name, 25) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + std::decay_t _p24_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_, _p24_); \ + } \ + } +#define make_kernel_functor_hip_26(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23) \ + struct make_kernel_name_hip(function_name, 24) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_); \ + } \ + } +#define make_kernel_functor_hip_25(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22) \ + struct make_kernel_name_hip(function_name, 23) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + __attribute__((used, flatten)) void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_); \ + } \ + } +#define make_kernel_functor_hip_24(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21) \ + struct make_kernel_name_hip(function_name, 22) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_); \ + } \ + } +#define make_kernel_functor_hip_23(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20) \ + struct make_kernel_name_hip(function_name, 21) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_); \ + } \ + } +#define make_kernel_functor_hip_22(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19) \ + struct make_kernel_name_hip(function_name, 20) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_); \ + } \ + } +#define make_kernel_functor_hip_21(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18) \ + struct make_kernel_name_hip(function_name, 19) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_); \ + } \ + } +#define make_kernel_functor_hip_20(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17) \ + struct make_kernel_name_hip(function_name, 18) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_); \ + } \ + } +#define make_kernel_functor_hip_19(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16) \ + struct make_kernel_name_hip(function_name, 17) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_); \ + } \ + } +#define make_kernel_functor_hip_18(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15) \ + struct make_kernel_name_hip(function_name, 16) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_); \ + } \ + } +#define make_kernel_functor_hip_17(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14) \ + struct make_kernel_name_hip(function_name, 15) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_); \ + } \ + } +#define make_kernel_functor_hip_16(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13) \ + struct make_kernel_name_hip(function_name, 14) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_); \ + } \ + } +#define make_kernel_functor_hip_15(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12) \ + struct make_kernel_name_hip(function_name, 13) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_); \ + } \ + } +#define make_kernel_functor_hip_14(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11) \ + struct make_kernel_name_hip(function_name, 12) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_); \ + } \ + } +#define make_kernel_functor_hip_13(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10) \ + struct make_kernel_name_hip(function_name, 11) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_); \ + } \ + } +#define make_kernel_functor_hip_12(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9) \ + struct make_kernel_name_hip(function_name, 10) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_); } \ + } +#define make_kernel_functor_hip_11(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8) \ + struct make_kernel_name_hip(function_name, 9) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_); } \ + } +#define make_kernel_functor_hip_10(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7) \ + struct make_kernel_name_hip(function_name, 8) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_); } \ + } +#define make_kernel_functor_hip_9(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6) \ + struct make_kernel_name_hip(function_name, 7) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_); } \ + } +#define make_kernel_functor_hip_8(function_name, kernel_name, p0, p1, p2, p3, p4, p5) \ + struct make_kernel_name_hip(function_name, 6) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_); } \ + } +#define make_kernel_functor_hip_7(function_name, kernel_name, p0, p1, p2, p3, p4) \ + struct make_kernel_name_hip(function_name, 5) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_); } \ + } +#define make_kernel_functor_hip_6(function_name, kernel_name, p0, p1, p2, p3) \ + struct make_kernel_name_hip(function_name, 4) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_); } \ + } +#define make_kernel_functor_hip_5(function_name, kernel_name, p0, p1, p2) \ + struct make_kernel_name_hip(function_name, 3) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_, _p2_); } \ + } +#define make_kernel_functor_hip_4(function_name, kernel_name, p0, p1) \ + struct make_kernel_name_hip(function_name, 2) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_); } \ + } +#define fofo(f, n) kernel_prefix_hip##f##kernel_suffix_hip##n +#define make_kernel_functor_hip_3(function_name, kernel_name, p0) \ + struct make_kernel_name_hip(function_name, 1) { \ + std::decay_t _p0_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_); } \ + } +#define make_kernel_functor_hip_2(function_name, kernel_name) \ + struct make_kernel_name_hip(function_name, 0) { \ + void operator()(const hc::tiled_index<3>&)[[hc]] { return kernel_name(hipLaunchParm{}); } \ + } +#define make_kernel_functor_hip_1(...) +#define make_kernel_functor_hip_0(...) +#define make_kernel_functor_hip_(...) overload_macro_hip_(make_kernel_functor_hip_, __VA_ARGS__) + + +#define hipLaunchNamedKernelGGL(function_name, kernel_name, num_blocks, dim_blocks, \ + group_mem_bytes, stream, ...) \ + do { \ + make_kernel_functor_hip_(function_name, kernel_name, __VA_ARGS__) \ + hip_kernel_functor_impl_{__VA_ARGS__}; \ + hip_impl::grid_launch_hip_(num_blocks, dim_blocks, group_mem_bytes, stream, #kernel_name, \ + hip_kernel_functor_impl_); \ + } while (0) + +#define hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...) \ + do { \ + hipLaunchNamedKernelGGL(unnamed, kernel_name, num_blocks, dim_blocks, group_mem_bytes, \ + stream, ##__VA_ARGS__); \ + } while (0) + +#define hipLaunchKernel(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...) \ + do { \ + hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, \ + hipLaunchParm{}, ##__VA_ARGS__); \ + } while (0) +} // namespace hip_impl \ No newline at end of file diff --git a/src/utils/amd_hip/hip/hcc_detail/math_functions.h b/src/utils/amd_hip/hip/hcc_detail/math_functions.h new file mode 100644 index 000000000..8ac87425b --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/math_functions.h @@ -0,0 +1,1501 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "hip_fp16_math_fwd.h" +#include "hip_vector_types.h" +#include "math_fwd.h" + +#include + +#include +#include +#include +#include +#include + +// HCC's own math functions should be included first, otherwise there will +// be conflicts when hip/math_functions.h is included before hip/hip_runtime.h. +#ifdef __HCC__ +#include "kalmar_math.h" +#endif + +#pragma push_macro("__DEVICE__") +#pragma push_macro("__RETURN_TYPE") + +#ifdef __HCC__ +#define __DEVICE__ __device__ +#define __RETURN_TYPE int +#else // to be consistent with __clang_cuda_math_forward_declares +#define __DEVICE__ static __device__ +#define __RETURN_TYPE bool +#endif + +__DEVICE__ +inline +uint64_t __make_mantissa_base8(const char* tagp) +{ + uint64_t r = 0; + while (tagp) { + char tmp = *tagp; + + if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0'; + else return 0; + + ++tagp; + } + + return r; +} + +__DEVICE__ +inline +uint64_t __make_mantissa_base10(const char* tagp) +{ + uint64_t r = 0; + while (tagp) { + char tmp = *tagp; + + if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0'; + else return 0; + + ++tagp; + } + + return r; +} + +__DEVICE__ +inline +uint64_t __make_mantissa_base16(const char* tagp) +{ + uint64_t r = 0; + while (tagp) { + char tmp = *tagp; + + if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0'; + else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10; + else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10; + else return 0; + + ++tagp; + } + + return r; +} + +__DEVICE__ +inline +uint64_t __make_mantissa(const char* tagp) +{ + if (!tagp) return 0u; + + if (*tagp == '0') { + ++tagp; + + if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp); + else return __make_mantissa_base8(tagp); + } + + return __make_mantissa_base10(tagp); +} + +// DOT FUNCTIONS +#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ +__DEVICE__ +inline +int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) { + return __ockl_sdot2(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) { + return __ockl_udot2(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) { + return __ockl_sdot4(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) { + return __ockl_udot4(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +int amd_mixed_dot(int a, int b, int c, bool saturate) { + return __ockl_sdot8(a, b, c, saturate); +} +__DEVICE__ +inline +uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) { + return __ockl_udot8(a, b, c, saturate); +} +#endif + +// BEGIN FLOAT +__DEVICE__ +inline +float abs(float x) { return __ocml_fabs_f32(x); } +__DEVICE__ +inline +float acosf(float x) { return __ocml_acos_f32(x); } +__DEVICE__ +inline +float acoshf(float x) { return __ocml_acosh_f32(x); } +__DEVICE__ +inline +float asinf(float x) { return __ocml_asin_f32(x); } +__DEVICE__ +inline +float asinhf(float x) { return __ocml_asinh_f32(x); } +__DEVICE__ +inline +float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); } +__DEVICE__ +inline +float atanf(float x) { return __ocml_atan_f32(x); } +__DEVICE__ +inline +float atanhf(float x) { return __ocml_atanh_f32(x); } +__DEVICE__ +inline +float cbrtf(float x) { return __ocml_cbrt_f32(x); } +__DEVICE__ +inline +float ceilf(float x) { return __ocml_ceil_f32(x); } +__DEVICE__ +inline +float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); } +__DEVICE__ +inline +float cosf(float x) { return __ocml_cos_f32(x); } +__DEVICE__ +inline +float coshf(float x) { return __ocml_cosh_f32(x); } +__DEVICE__ +inline +float cospif(float x) { return __ocml_cospi_f32(x); } +__DEVICE__ +inline +float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); } +__DEVICE__ +inline +float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); } +__DEVICE__ +inline +float erfcf(float x) { return __ocml_erfc_f32(x); } +__DEVICE__ +inline +float erfcinvf(float x) { return __ocml_erfcinv_f32(x); } +__DEVICE__ +inline +float erfcxf(float x) { return __ocml_erfcx_f32(x); } +__DEVICE__ +inline +float erff(float x) { return __ocml_erf_f32(x); } +__DEVICE__ +inline +float erfinvf(float x) { return __ocml_erfinv_f32(x); } +__DEVICE__ +inline +float exp10f(float x) { return __ocml_exp10_f32(x); } +__DEVICE__ +inline +float exp2f(float x) { return __ocml_exp2_f32(x); } +__DEVICE__ +inline +float expf(float x) { return __ocml_exp_f32(x); } +__DEVICE__ +inline +float expm1f(float x) { return __ocml_expm1_f32(x); } +__DEVICE__ +inline +float fabsf(float x) { return __ocml_fabs_f32(x); } +__DEVICE__ +inline +float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); } +__DEVICE__ +inline +float fdividef(float x, float y) { return x / y; } +__DEVICE__ +inline +float floorf(float x) { return __ocml_floor_f32(x); } +__DEVICE__ +inline +float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); } +__DEVICE__ +inline +float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); } +__DEVICE__ +inline +float fminf(float x, float y) { return __ocml_fmin_f32(x, y); } +__DEVICE__ +inline +float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); } +__DEVICE__ +inline +float frexpf(float x, int* nptr) +{ + int tmp; + float r = + __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp); + *nptr = tmp; + + return r; +} +__DEVICE__ +inline +float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); } +__DEVICE__ +inline +int ilogbf(float x) { return __ocml_ilogb_f32(x); } +__DEVICE__ +inline +__RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); } +__DEVICE__ +inline +__RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); } +__DEVICE__ +inline +__RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); } +__DEVICE__ +inline +float j0f(float x) { return __ocml_j0_f32(x); } +__DEVICE__ +inline +float j1f(float x) { return __ocml_j1_f32(x); } +__DEVICE__ +inline +float jnf(int n, float x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + float x0 = j0f(x); + float x1 = j1f(x); + for (int i = 1; i < n; ++i) { + float x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} +__DEVICE__ +inline +float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); } +__DEVICE__ +inline +float lgammaf(float x) { return __ocml_lgamma_f32(x); } +__DEVICE__ +inline +long long int llrintf(float x) { return __ocml_rint_f32(x); } +__DEVICE__ +inline +long long int llroundf(float x) { return __ocml_round_f32(x); } +__DEVICE__ +inline +float log10f(float x) { return __ocml_log10_f32(x); } +__DEVICE__ +inline +float log1pf(float x) { return __ocml_log1p_f32(x); } +__DEVICE__ +inline +float log2f(float x) { return __ocml_log2_f32(x); } +__DEVICE__ +inline +float logbf(float x) { return __ocml_logb_f32(x); } +__DEVICE__ +inline +float logf(float x) { return __ocml_log_f32(x); } +__DEVICE__ +inline +long int lrintf(float x) { return __ocml_rint_f32(x); } +__DEVICE__ +inline +long int lroundf(float x) { return __ocml_round_f32(x); } +__DEVICE__ +inline +float modff(float x, float* iptr) +{ + float tmp; + float r = + __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *iptr = tmp; + + return r; +} +__DEVICE__ +inline +float nanf(const char* tagp) +{ + union { + float val; + struct ieee_float { + uint32_t mantissa : 22; + uint32_t quiet : 1; + uint32_t exponent : 8; + uint32_t sign : 1; + } bits; + + static_assert(sizeof(float) == sizeof(ieee_float), ""); + } tmp; + + tmp.bits.sign = 0u; + tmp.bits.exponent = ~0u; + tmp.bits.quiet = 1u; + tmp.bits.mantissa = __make_mantissa(tagp); + + return tmp.val; +} +__DEVICE__ +inline +float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } +__DEVICE__ +inline +float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); } +__DEVICE__ +inline +float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); } +__DEVICE__ +inline +float norm4df(float x, float y, float z, float w) +{ + return __ocml_len4_f32(x, y, z, w); +} +__DEVICE__ +inline +float normcdff(float x) { return __ocml_ncdf_f32(x); } +__DEVICE__ +inline +float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); } +__DEVICE__ +inline +float normf(int dim, const float* a) +{ // TODO: placeholder until OCML adds support. + float r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_sqrt_f32(r); +} +__DEVICE__ +inline +float powf(float x, float y) { return __ocml_pow_f32(x, y); } +__DEVICE__ +inline +float rcbrtf(float x) { return __ocml_rcbrt_f32(x); } +__DEVICE__ +inline +float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); } +__DEVICE__ +inline +float remquof(float x, float y, int* quo) +{ + int tmp; + float r = + __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp); + *quo = tmp; + + return r; +} +__DEVICE__ +inline +float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); } +__DEVICE__ +inline +float rintf(float x) { return __ocml_rint_f32(x); } +__DEVICE__ +inline +float rnorm3df(float x, float y, float z) +{ + return __ocml_rlen3_f32(x, y, z); +} + +__DEVICE__ +inline +float rnorm4df(float x, float y, float z, float w) +{ + return __ocml_rlen4_f32(x, y, z, w); +} +__DEVICE__ +inline +float rnormf(int dim, const float* a) +{ // TODO: placeholder until OCML adds support. + float r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_rsqrt_f32(r); +} +__DEVICE__ +inline +float roundf(float x) { return __ocml_round_f32(x); } +__DEVICE__ +inline +float rsqrtf(float x) { return __ocml_rsqrt_f32(x); } +__DEVICE__ +inline +float scalblnf(float x, long int n) +{ + return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n); +} +__DEVICE__ +inline +float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); } +__DEVICE__ +inline +__RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); } +__DEVICE__ +inline +void sincosf(float x, float* sptr, float* cptr) +{ + float tmp; + + *sptr = + __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *cptr = tmp; +} +__DEVICE__ +inline +void sincospif(float x, float* sptr, float* cptr) +{ + float tmp; + + *sptr = + __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *cptr = tmp; +} +__DEVICE__ +inline +float sinf(float x) { return __ocml_sin_f32(x); } +__DEVICE__ +inline +float sinhf(float x) { return __ocml_sinh_f32(x); } +__DEVICE__ +inline +float sinpif(float x) { return __ocml_sinpi_f32(x); } +__DEVICE__ +inline +float sqrtf(float x) { return __ocml_sqrt_f32(x); } +__DEVICE__ +inline +float tanf(float x) { return __ocml_tan_f32(x); } +__DEVICE__ +inline +float tanhf(float x) { return __ocml_tanh_f32(x); } +__DEVICE__ +inline +float tgammaf(float x) { return __ocml_tgamma_f32(x); } +__DEVICE__ +inline +float truncf(float x) { return __ocml_trunc_f32(x); } +__DEVICE__ +inline +float y0f(float x) { return __ocml_y0_f32(x); } +__DEVICE__ +inline +float y1f(float x) { return __ocml_y1_f32(x); } +__DEVICE__ +inline +float ynf(int n, float x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. Placeholder until OCML adds + // support. + if (n == 0) return y0f(x); + if (n == 1) return y1f(x); + + float x0 = y0f(x); + float x1 = y1f(x); + for (int i = 1; i < n; ++i) { + float x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} + +// BEGIN INTRINSICS +__DEVICE__ +inline +float __cosf(float x) { return __ocml_native_cos_f32(x); } +__DEVICE__ +inline +float __exp10f(float x) { return __ocml_native_exp10_f32(x); } +__DEVICE__ +inline +float __expf(float x) { return __ocml_native_exp_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } +#endif +__DEVICE__ +inline +float __fadd_rn(float x, float y) { return x + y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } +__DEVICE__ +inline +float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } +#endif +__DEVICE__ +inline +float __fdiv_rn(float x, float y) { return x / y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } +#endif +__DEVICE__ +inline +float __fdividef(float x, float y) { return x / y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fmaf_rd(float x, float y, float z) +{ + return __ocml_fma_rtn_f32(x, y, z); +} +#endif +__DEVICE__ +inline +float __fmaf_rn(float x, float y, float z) +{ + return __ocml_fma_f32(x, y, z); +} +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fmaf_ru(float x, float y, float z) +{ + return __ocml_fma_rtp_f32(x, y, z); +} +__DEVICE__ +inline +float __fmaf_rz(float x, float y, float z) +{ + return __ocml_fma_rtz_f32(x, y, z); +} +__DEVICE__ +inline +float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } +#endif +__DEVICE__ +inline +float __fmul_rn(float x, float y) { return x * y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } +__DEVICE__ +inline +float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } +__DEVICE__ +inline +float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); } +#endif +__DEVICE__ +inline +float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); } +__DEVICE__ +inline +float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); } +#endif +__DEVICE__ +inline +float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } +#endif +__DEVICE__ +inline +float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } +__DEVICE__ +inline +float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } +__DEVICE__ +inline +float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } +#endif +__DEVICE__ +inline +float __fsub_rn(float x, float y) { return x - y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } +__DEVICE__ +inline +float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } +#endif +__DEVICE__ +inline +float __log10f(float x) { return __ocml_native_log10_f32(x); } +__DEVICE__ +inline +float __log2f(float x) { return __ocml_native_log2_f32(x); } +__DEVICE__ +inline +float __logf(float x) { return __ocml_native_log_f32(x); } +__DEVICE__ +inline +float __powf(float x, float y) { return __ocml_pow_f32(x, y); } +__DEVICE__ +inline +float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); } +__DEVICE__ +inline +void __sincosf(float x, float* sptr, float* cptr) +{ + *sptr = __ocml_native_sin_f32(x); + *cptr = __ocml_native_cos_f32(x); +} +__DEVICE__ +inline +float __sinf(float x) { return __ocml_native_sin_f32(x); } +__DEVICE__ +inline +float __tanf(float x) { return __ocml_tan_f32(x); } +// END INTRINSICS +// END FLOAT + +// BEGIN DOUBLE +__DEVICE__ +inline +double abs(double x) { return __ocml_fabs_f64(x); } +__DEVICE__ +inline +double acos(double x) { return __ocml_acos_f64(x); } +__DEVICE__ +inline +double acosh(double x) { return __ocml_acosh_f64(x); } +__DEVICE__ +inline +double asin(double x) { return __ocml_asin_f64(x); } +__DEVICE__ +inline +double asinh(double x) { return __ocml_asinh_f64(x); } +__DEVICE__ +inline +double atan(double x) { return __ocml_atan_f64(x); } +__DEVICE__ +inline +double atan2(double x, double y) { return __ocml_atan2_f64(x, y); } +__DEVICE__ +inline +double atanh(double x) { return __ocml_atanh_f64(x); } +__DEVICE__ +inline +double cbrt(double x) { return __ocml_cbrt_f64(x); } +__DEVICE__ +inline +double ceil(double x) { return __ocml_ceil_f64(x); } +__DEVICE__ +inline +double copysign(double x, double y) { return __ocml_copysign_f64(x, y); } +__DEVICE__ +inline +double cos(double x) { return __ocml_cos_f64(x); } +__DEVICE__ +inline +double cosh(double x) { return __ocml_cosh_f64(x); } +__DEVICE__ +inline +double cospi(double x) { return __ocml_cospi_f64(x); } +__DEVICE__ +inline +double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); } +__DEVICE__ +inline +double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); } +__DEVICE__ +inline +double erf(double x) { return __ocml_erf_f64(x); } +__DEVICE__ +inline +double erfc(double x) { return __ocml_erfc_f64(x); } +__DEVICE__ +inline +double erfcinv(double x) { return __ocml_erfcinv_f64(x); } +__DEVICE__ +inline +double erfcx(double x) { return __ocml_erfcx_f64(x); } +__DEVICE__ +inline +double erfinv(double x) { return __ocml_erfinv_f64(x); } +__DEVICE__ +inline +double exp(double x) { return __ocml_exp_f64(x); } +__DEVICE__ +inline +double exp10(double x) { return __ocml_exp10_f64(x); } +__DEVICE__ +inline +double exp2(double x) { return __ocml_exp2_f64(x); } +__DEVICE__ +inline +double expm1(double x) { return __ocml_expm1_f64(x); } +__DEVICE__ +inline +double fabs(double x) { return __ocml_fabs_f64(x); } +__DEVICE__ +inline +double fdim(double x, double y) { return __ocml_fdim_f64(x, y); } +__DEVICE__ +inline +double floor(double x) { return __ocml_floor_f64(x); } +__DEVICE__ +inline +double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); } +__DEVICE__ +inline +double fmax(double x, double y) { return __ocml_fmax_f64(x, y); } +__DEVICE__ +inline +double fmin(double x, double y) { return __ocml_fmin_f64(x, y); } +__DEVICE__ +inline +double fmod(double x, double y) { return __ocml_fmod_f64(x, y); } +__DEVICE__ +inline +double frexp(double x, int* nptr) +{ + int tmp; + double r = + __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp); + *nptr = tmp; + + return r; +} +__DEVICE__ +inline +double hypot(double x, double y) { return __ocml_hypot_f64(x, y); } +__DEVICE__ +inline +int ilogb(double x) { return __ocml_ilogb_f64(x); } +__DEVICE__ +inline +__RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); } +__DEVICE__ +inline +__RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); } +__DEVICE__ +inline +__RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); } +__DEVICE__ +inline +double j0(double x) { return __ocml_j0_f64(x); } +__DEVICE__ +inline +double j1(double x) { return __ocml_j1_f64(x); } +__DEVICE__ +inline +double jn(int n, double x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. Placeholder until OCML adds + // support. + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + double x0 = j0f(x); + double x1 = j1f(x); + for (int i = 1; i < n; ++i) { + double x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} +__DEVICE__ +inline +double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); } +__DEVICE__ +inline +double lgamma(double x) { return __ocml_lgamma_f64(x); } +__DEVICE__ +inline +long long int llrint(double x) { return __ocml_rint_f64(x); } +__DEVICE__ +inline +long long int llround(double x) { return __ocml_round_f64(x); } +__DEVICE__ +inline +double log(double x) { return __ocml_log_f64(x); } +__DEVICE__ +inline +double log10(double x) { return __ocml_log10_f64(x); } +__DEVICE__ +inline +double log1p(double x) { return __ocml_log1p_f64(x); } +__DEVICE__ +inline +double log2(double x) { return __ocml_log2_f64(x); } +__DEVICE__ +inline +double logb(double x) { return __ocml_logb_f64(x); } +__DEVICE__ +inline +long int lrint(double x) { return __ocml_rint_f64(x); } +__DEVICE__ +inline +long int lround(double x) { return __ocml_round_f64(x); } +__DEVICE__ +inline +double modf(double x, double* iptr) +{ + double tmp; + double r = + __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp); + *iptr = tmp; + + return r; +} +__DEVICE__ +inline +double nan(const char* tagp) +{ +#if !_WIN32 + union { + double val; + struct ieee_double { + uint64_t mantissa : 51; + uint32_t quiet : 1; + uint32_t exponent : 11; + uint32_t sign : 1; + } bits; + static_assert(sizeof(double) == sizeof(ieee_double), ""); + } tmp; + + tmp.bits.sign = 0u; + tmp.bits.exponent = ~0u; + tmp.bits.quiet = 1u; + tmp.bits.mantissa = __make_mantissa(tagp); + + return tmp.val; +#else + uint64_t val = __make_mantissa(tagp); + val |= 0xFFF << 51; + return reinterpret_cast(val); +#endif +} +__DEVICE__ +inline +double nearbyint(double x) { return __ocml_nearbyint_f64(x); } +__DEVICE__ +inline +double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); } +__DEVICE__ +inline +double norm(int dim, const double* a) +{ // TODO: placeholder until OCML adds support. + double r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_sqrt_f64(r); +} +__DEVICE__ +inline +double norm3d(double x, double y, double z) +{ + return __ocml_len3_f64(x, y, z); +} +__DEVICE__ +inline +double norm4d(double x, double y, double z, double w) +{ + return __ocml_len4_f64(x, y, z, w); +} +__DEVICE__ +inline +double normcdf(double x) { return __ocml_ncdf_f64(x); } +__DEVICE__ +inline +double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); } +__DEVICE__ +inline +double pow(double x, double y) { return __ocml_pow_f64(x, y); } +__DEVICE__ +inline +double rcbrt(double x) { return __ocml_rcbrt_f64(x); } +__DEVICE__ +inline +double remainder(double x, double y) { return __ocml_remainder_f64(x, y); } +__DEVICE__ +inline +double remquo(double x, double y, int* quo) +{ + int tmp; + double r = + __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp); + *quo = tmp; + + return r; +} +__DEVICE__ +inline +double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); } +__DEVICE__ +inline +double rint(double x) { return __ocml_rint_f64(x); } +__DEVICE__ +inline +double rnorm(int dim, const double* a) +{ // TODO: placeholder until OCML adds support. + double r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_rsqrt_f64(r); +} +__DEVICE__ +inline +double rnorm3d(double x, double y, double z) +{ + return __ocml_rlen3_f64(x, y, z); +} +__DEVICE__ +inline +double rnorm4d(double x, double y, double z, double w) +{ + return __ocml_rlen4_f64(x, y, z, w); +} +__DEVICE__ +inline +double round(double x) { return __ocml_round_f64(x); } +__DEVICE__ +inline +double rsqrt(double x) { return __ocml_rsqrt_f64(x); } +__DEVICE__ +inline +double scalbln(double x, long int n) +{ + return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n); +} +__DEVICE__ +inline +double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); } +__DEVICE__ +inline +__RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); } +__DEVICE__ +inline +double sin(double x) { return __ocml_sin_f64(x); } +__DEVICE__ +inline +void sincos(double x, double* sptr, double* cptr) +{ + double tmp; + *sptr = + __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp); + *cptr = tmp; +} +__DEVICE__ +inline +void sincospi(double x, double* sptr, double* cptr) +{ + double tmp; + *sptr = __ocml_sincospi_f64( + x, (__attribute__((address_space(5))) double*) &tmp); + *cptr = tmp; +} +__DEVICE__ +inline +double sinh(double x) { return __ocml_sinh_f64(x); } +__DEVICE__ +inline +double sinpi(double x) { return __ocml_sinpi_f64(x); } +__DEVICE__ +inline +double sqrt(double x) { return __ocml_sqrt_f64(x); } +__DEVICE__ +inline +double tan(double x) { return __ocml_tan_f64(x); } +__DEVICE__ +inline +double tanh(double x) { return __ocml_tanh_f64(x); } +__DEVICE__ +inline +double tgamma(double x) { return __ocml_tgamma_f64(x); } +__DEVICE__ +inline +double trunc(double x) { return __ocml_trunc_f64(x); } +__DEVICE__ +inline +double y0(double x) { return __ocml_y0_f64(x); } +__DEVICE__ +inline +double y1(double x) { return __ocml_y1_f64(x); } +__DEVICE__ +inline +double yn(int n, double x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. Placeholder until OCML adds + // support. + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + double x0 = j0f(x); + double x1 = j1f(x); + for (int i = 1; i < n; ++i) { + double x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} + +// BEGIN INTRINSICS +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } +#endif +__DEVICE__ +inline +double __dadd_rn(double x, double y) { return x + y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } +__DEVICE__ +inline +double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } +#endif +__DEVICE__ +inline +double __ddiv_rn(double x, double y) { return x / y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } +__DEVICE__ +inline +double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } +#endif +__DEVICE__ +inline +double __dmul_rn(double x, double y) { return x * y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } +__DEVICE__ +inline +double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } +__DEVICE__ +inline +double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); } +#endif +__DEVICE__ +inline +double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); } +__DEVICE__ +inline +double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); } +__DEVICE__ +inline +double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } +#endif +__DEVICE__ +inline +double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } +__DEVICE__ +inline +double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } +__DEVICE__ +inline +double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } +#endif +__DEVICE__ +inline +double __dsub_rn(double x, double y) { return x - y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } +__DEVICE__ +inline +double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } +__DEVICE__ +inline +double __fma_rd(double x, double y, double z) +{ + return __ocml_fma_rtn_f64(x, y, z); +} +#endif +__DEVICE__ +inline +double __fma_rn(double x, double y, double z) +{ + return __ocml_fma_f64(x, y, z); +} +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __fma_ru(double x, double y, double z) +{ + return __ocml_fma_rtp_f64(x, y, z); +} +__DEVICE__ +inline +double __fma_rz(double x, double y, double z) +{ + return __ocml_fma_rtz_f64(x, y, z); +} +#endif +// END INTRINSICS +// END DOUBLE + +// BEGIN INTEGER +__DEVICE__ +inline +int abs(int x) +{ + int sgn = x >> (sizeof(int) * CHAR_BIT - 1); + return (x ^ sgn) - sgn; +} +__DEVICE__ +inline +long labs(long x) +{ + long sgn = x >> (sizeof(long) * CHAR_BIT - 1); + return (x ^ sgn) - sgn; +} +__DEVICE__ +inline +long long llabs(long long x) +{ + long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1); + return (x ^ sgn) - sgn; +} + +#if defined(__cplusplus) + __DEVICE__ + inline + long abs(long x) { return labs(x); } + __DEVICE__ + inline + long long abs(long long x) { return llabs(x); } +#endif +// END INTEGER + +__DEVICE__ +inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) { + return __ocml_fma_f16(x, y, z); +} + +__DEVICE__ +inline float fma(float x, float y, float z) { + return fmaf(x, y, z); +} + +#pragma push_macro("__DEF_FLOAT_FUN") +#pragma push_macro("__DEF_FLOAT_FUN2") +#pragma push_macro("__DEF_FLOAT_FUN2I") +#pragma push_macro("__HIP_OVERLOAD") +#pragma push_macro("__HIP_OVERLOAD2") + +// __hip_enable_if::type is a type function which returns __T if __B is true. +template +struct __hip_enable_if {}; + +template struct __hip_enable_if { + typedef __T type; +}; + +// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to +// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with +// floor(double). +#define __HIP_OVERLOAD1(__retty, __fn) \ + template \ + __DEVICE__ \ + typename __hip_enable_if::is_integer, \ + __retty>::type \ + __fn(__T __x) { \ + return ::__fn((double)__x); \ + } + +// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double +// or integer argument to avoid compilation error due to ambibuity. e.g. +// max(5.0f, 6.0) is resolved with max(double, double). +#define __HIP_OVERLOAD2(__retty, __fn) \ + template \ + __DEVICE__ typename __hip_enable_if< \ + std::numeric_limits<__T1>::is_specialized && \ + std::numeric_limits<__T2>::is_specialized, \ + __retty>::type \ + __fn(__T1 __x, __T2 __y) { \ + return __fn((double)__x, (double)__y); \ + } + +// Define cmath functions with float argument and returns float. +#define __DEF_FUN1(retty, func) \ +__DEVICE__ \ +inline \ +float func(float x) \ +{ \ + return func##f(x); \ +} \ +__HIP_OVERLOAD1(retty, func) + +// Define cmath functions with float argument and returns retty. +#define __DEF_FUNI(retty, func) \ +__DEVICE__ \ +inline \ +retty func(float x) \ +{ \ + return func##f(x); \ +} \ +__HIP_OVERLOAD1(retty, func) + +// define cmath functions with two float arguments. +#define __DEF_FUN2(retty, func) \ +__DEVICE__ \ +inline \ +float func(float x, float y) \ +{ \ + return func##f(x, y); \ +} \ +__HIP_OVERLOAD2(retty, func) + +__DEF_FUN1(double, acos) +__DEF_FUN1(double, acosh) +__DEF_FUN1(double, asin) +__DEF_FUN1(double, asinh) +__DEF_FUN1(double, atan) +__DEF_FUN2(double, atan2); +__DEF_FUN1(double, atanh) +__DEF_FUN1(double, cbrt) +__DEF_FUN1(double, ceil) +__DEF_FUN2(double, copysign); +__DEF_FUN1(double, cos) +__DEF_FUN1(double, cosh) +__DEF_FUN1(double, erf) +__DEF_FUN1(double, erfc) +__DEF_FUN1(double, exp) +__DEF_FUN1(double, exp2) +__DEF_FUN1(double, expm1) +__DEF_FUN1(double, fabs) +__DEF_FUN2(double, fdim); +__DEF_FUN1(double, floor) +__DEF_FUN2(double, fmax); +__DEF_FUN2(double, fmin); +__DEF_FUN2(double, fmod); +//__HIP_OVERLOAD1(int, fpclassify) +__DEF_FUN2(double, hypot); +__DEF_FUNI(int, ilogb) +__HIP_OVERLOAD1(bool, isfinite) +__HIP_OVERLOAD2(bool, isgreater); +__HIP_OVERLOAD2(bool, isgreaterequal); +__HIP_OVERLOAD1(bool, isinf); +__HIP_OVERLOAD2(bool, isless); +__HIP_OVERLOAD2(bool, islessequal); +__HIP_OVERLOAD2(bool, islessgreater); +__HIP_OVERLOAD1(bool, isnan); +//__HIP_OVERLOAD1(bool, isnormal) +__HIP_OVERLOAD2(bool, isunordered); +__DEF_FUN1(double, lgamma) +__DEF_FUN1(double, log) +__DEF_FUN1(double, log10) +__DEF_FUN1(double, log1p) +__DEF_FUN1(double, log2) +__DEF_FUN1(double, logb) +__DEF_FUNI(long long, llrint) +__DEF_FUNI(long long, llround) +__DEF_FUNI(long, lrint) +__DEF_FUNI(long, lround) +__DEF_FUN1(double, nearbyint); +__DEF_FUN2(double, nextafter); +__DEF_FUN2(double, pow); +__DEF_FUN2(double, remainder); +__DEF_FUN1(double, rint); +__DEF_FUN1(double, round); +__HIP_OVERLOAD1(bool, signbit) +__DEF_FUN1(double, sin) +__DEF_FUN1(double, sinh) +__DEF_FUN1(double, sqrt) +__DEF_FUN1(double, tan) +__DEF_FUN1(double, tanh) +__DEF_FUN1(double, tgamma) +__DEF_FUN1(double, trunc); + +// define cmath functions with a float and an integer argument. +#define __DEF_FLOAT_FUN2I(func) \ +__DEVICE__ \ +inline \ +float func(float x, int y) \ +{ \ + return func##f(x, y); \ +} +__DEF_FLOAT_FUN2I(scalbn) + +#if __HCC__ +template +__DEVICE__ inline static T min(T arg1, T arg2) { + return (arg1 < arg2) ? arg1 : arg2; +} + +__DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) { + return min(arg1, (uint32_t) arg2); +} +/*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) { + return min((uint32_t) arg1, arg2); +} + +__DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) { + return min(arg1, (uint64_t) arg2); +} +__DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) { + return min((uint64_t) arg1, arg2); +} + +__DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) { + return min(arg1, (unsigned long long) arg2); +} +__DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) { + return min((unsigned long long) arg1, arg2); +}*/ + +template +__DEVICE__ inline static T max(T arg1, T arg2) { + return (arg1 > arg2) ? arg1 : arg2; +} + +__DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) { + return max(arg1, (uint32_t) arg2); +} +__DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) { + return max((uint32_t) arg1, arg2); +} + +/*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) { + return max(arg1, (uint64_t) arg2); +} +__DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) { + return max((uint64_t) arg1, arg2); +} + +__DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) { + return max(arg1, (unsigned long long) arg2); +} +__DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) { + return max((unsigned long long) arg1, arg2); +}*/ +#else +__DEVICE__ inline int min(int arg1, int arg2) { + return (arg1 < arg2) ? arg1 : arg2; +} +__DEVICE__ inline int max(int arg1, int arg2) { + return (arg1 > arg2) ? arg1 : arg2; +} + +__DEVICE__ +inline +float max(float x, float y) { + return fmaxf(x, y); +} + +__DEVICE__ +inline +double max(double x, double y) { + return fmax(x, y); +} + +__DEVICE__ +inline +float min(float x, float y) { + return fminf(x, y); +} + +__DEVICE__ +inline +double min(double x, double y) { + return fmin(x, y); +} + +__HIP_OVERLOAD2(double, max) +__HIP_OVERLOAD2(double, min) + +#endif + +__host__ inline static int min(int arg1, int arg2) { + return std::min(arg1, arg2); +} + +__host__ inline static int max(int arg1, int arg2) { + return std::max(arg1, arg2); +} + + +#pragma pop_macro("__DEF_FLOAT_FUN") +#pragma pop_macro("__DEF_FLOAT_FUN2") +#pragma pop_macro("__DEF_FLOAT_FUN2I") +#pragma pop_macro("__HIP_OVERLOAD") +#pragma pop_macro("__HIP_OVERLOAD2") +#pragma pop_macro("__DEVICE__") +#pragma pop_macro("__RETURN_TYPE") + +// For backward compatibility. +// There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros +// defined after including math_functions.h. +#include diff --git a/src/utils/amd_hip/hip/hcc_detail/math_fwd.h b/src/utils/amd_hip/hip/hcc_detail/math_fwd.h new file mode 100644 index 000000000..c25b5e90b --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/math_fwd.h @@ -0,0 +1,706 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "host_defines.h" + +#if defined(__cplusplus) + extern "C" { +#endif + +// DOT FUNCTIONS +#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ +__device__ +__attribute__((const)) +int __ockl_sdot2( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + int, bool); + +__device__ +__attribute__((const)) +unsigned int __ockl_udot2( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + unsigned int, bool); + +__device__ +__attribute__((const)) +int __ockl_sdot4( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + int, bool); + +__device__ +__attribute__((const)) +unsigned int __ockl_udot4( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + unsigned int, bool); + +__device__ +__attribute__((const)) +int __ockl_sdot8(int, int, int, bool); + +__device__ +__attribute__((const)) +unsigned int __ockl_udot8(unsigned int, unsigned int, unsigned int, bool); +#endif + +// BEGIN FLOAT +__device__ +__attribute__((const)) +float __ocml_acos_f32(float); +__device__ +__attribute__((pure)) +float __ocml_acosh_f32(float); +__device__ +__attribute__((const)) +float __ocml_asin_f32(float); +__device__ +__attribute__((pure)) +float __ocml_asinh_f32(float); +__device__ +__attribute__((const)) +float __ocml_atan2_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_atan_f32(float); +__device__ +__attribute__((pure)) +float __ocml_atanh_f32(float); +__device__ +__attribute__((pure)) +float __ocml_cbrt_f32(float); +__device__ +__attribute__((const)) +float __ocml_ceil_f32(float); +__device__ +__attribute__((const)) +__device__ +float __ocml_copysign_f32(float, float); +__device__ +float __ocml_cos_f32(float); +__device__ +float __ocml_native_cos_f32(float); +__device__ +__attribute__((pure)) +__device__ +float __ocml_cosh_f32(float); +__device__ +float __ocml_cospi_f32(float); +__device__ +float __ocml_i0_f32(float); +__device__ +float __ocml_i1_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erfc_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erfcinv_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erfcx_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erf_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erfinv_f32(float); +__device__ +__attribute__((pure)) +float __ocml_exp10_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_exp10_f32(float); +__device__ +__attribute__((pure)) +float __ocml_exp2_f32(float); +__device__ +__attribute__((pure)) +float __ocml_exp_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_exp_f32(float); +__device__ +__attribute__((pure)) +float __ocml_expm1_f32(float); +__device__ +__attribute__((const)) +float __ocml_fabs_f32(float); +__device__ +__attribute__((const)) +float __ocml_fdim_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_floor_f32(float); +__device__ +__attribute__((const)) +float __ocml_fma_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_fmax_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_fmin_f32(float, float); +__device__ +__attribute__((const)) +__device__ +float __ocml_fmod_f32(float, float); +__device__ +float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*); +__device__ +__attribute__((const)) +float __ocml_hypot_f32(float, float); +__device__ +__attribute__((const)) +int __ocml_ilogb_f32(float); +__device__ +__attribute__((const)) +int __ocml_isfinite_f32(float); +__device__ +__attribute__((const)) +int __ocml_isinf_f32(float); +__device__ +__attribute__((const)) +int __ocml_isnan_f32(float); +__device__ +float __ocml_j0_f32(float); +__device__ +float __ocml_j1_f32(float); +__device__ +__attribute__((const)) +float __ocml_ldexp_f32(float, int); +__device__ +float __ocml_lgamma_f32(float); +__device__ +__attribute__((pure)) +float __ocml_log10_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_log10_f32(float); +__device__ +__attribute__((pure)) +float __ocml_log1p_f32(float); +__device__ +__attribute__((pure)) +float __ocml_log2_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_log2_f32(float); +__device__ +__attribute__((const)) +float __ocml_logb_f32(float); +__device__ +__attribute__((pure)) +float __ocml_log_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_log_f32(float); +__device__ +float __ocml_modf_f32(float, __attribute__((address_space(5))) float*); +__device__ +__attribute__((const)) +float __ocml_nearbyint_f32(float); +__device__ +__attribute__((const)) +float __ocml_nextafter_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_len3_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_len4_f32(float, float, float, float); +__device__ +__attribute__((pure)) +float __ocml_ncdf_f32(float); +__device__ +__attribute__((pure)) +float __ocml_ncdfinv_f32(float); +__device__ +__attribute__((pure)) +float __ocml_pow_f32(float, float); +__device__ +__attribute__((pure)) +float __ocml_rcbrt_f32(float); +__device__ +__attribute__((const)) +float __ocml_remainder_f32(float, float); +__device__ +float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*); +__device__ +__attribute__((const)) +float __ocml_rhypot_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_rint_f32(float); +__device__ +__attribute__((const)) +float __ocml_rlen3_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_rlen4_f32(float, float, float, float); +__device__ +__attribute__((const)) +float __ocml_round_f32(float); +__device__ +__attribute__((pure)) +float __ocml_rsqrt_f32(float); +__device__ +__attribute__((const)) +float __ocml_scalb_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_scalbn_f32(float, int); +__device__ +__attribute__((const)) +int __ocml_signbit_f32(float); +__device__ +float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*); +__device__ +float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*); +__device__ +float __ocml_sin_f32(float); +__device__ +float __ocml_native_sin_f32(float); +__device__ +__attribute__((pure)) +float __ocml_sinh_f32(float); +__device__ +float __ocml_sinpi_f32(float); +__device__ +__attribute__((const)) +float __ocml_sqrt_f32(float); +__device__ +__attribute__((const)) +float __ocml_native_sqrt_f32(float); +__device__ +float __ocml_tan_f32(float); +__device__ +__attribute__((pure)) +float __ocml_tanh_f32(float); +__device__ +float __ocml_tgamma_f32(float); +__device__ +__attribute__((const)) +float __ocml_trunc_f32(float); +__device__ +float __ocml_y0_f32(float); +__device__ +float __ocml_y1_f32(float); + +// BEGIN INTRINSICS +__device__ +__attribute__((const)) +float __ocml_add_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_add_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_add_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_add_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sub_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sub_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sub_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sub_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_mul_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_mul_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_mul_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_mul_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_fma_rte_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_fma_rtn_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_fma_rtp_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_fma_rtz_f32(float, float, float); + +__device__ +__attribute__((const)) +float __llvm_amdgcn_cos_f32(float) __asm("llvm.amdgcn.cos.f32"); +__device__ +__attribute__((const)) +float __llvm_amdgcn_rcp_f32(float) __asm("llvm.amdgcn.rcp.f32"); +__device__ +__attribute__((const)) +float __llvm_amdgcn_rsq_f32(float) __asm("llvm.amdgcn.rsq.f32"); +__device__ +__attribute__((const)) +float __llvm_amdgcn_sin_f32(float) __asm("llvm.amdgcn.sin.f32"); +// END INTRINSICS +// END FLOAT + +// BEGIN DOUBLE +__device__ +__attribute__((const)) +double __ocml_acos_f64(double); +__device__ +__attribute__((pure)) +double __ocml_acosh_f64(double); +__device__ +__attribute__((const)) +double __ocml_asin_f64(double); +__device__ +__attribute__((pure)) +double __ocml_asinh_f64(double); +__device__ +__attribute__((const)) +double __ocml_atan2_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_atan_f64(double); +__device__ +__attribute__((pure)) +double __ocml_atanh_f64(double); +__device__ +__attribute__((pure)) +double __ocml_cbrt_f64(double); +__device__ +__attribute__((const)) +double __ocml_ceil_f64(double); +__device__ +__attribute__((const)) +double __ocml_copysign_f64(double, double); +__device__ +double __ocml_cos_f64(double); +__device__ +__attribute__((pure)) +double __ocml_cosh_f64(double); +__device__ +double __ocml_cospi_f64(double); +__device__ +double __ocml_i0_f64(double); +__device__ +double __ocml_i1_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erfc_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erfcinv_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erfcx_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erf_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erfinv_f64(double); +__device__ +__attribute__((pure)) +double __ocml_exp10_f64(double); +__device__ +__attribute__((pure)) +double __ocml_exp2_f64(double); +__device__ +__attribute__((pure)) +double __ocml_exp_f64(double); +__device__ +__attribute__((pure)) +double __ocml_expm1_f64(double); +__device__ +__attribute__((const)) +double __ocml_fabs_f64(double); +__device__ +__attribute__((const)) +double __ocml_fdim_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_floor_f64(double); +__device__ +__attribute__((const)) +double __ocml_fma_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_fmax_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_fmin_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_fmod_f64(double, double); +__device__ +double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*); +__device__ +__attribute__((const)) +double __ocml_hypot_f64(double, double); +__device__ +__attribute__((const)) +int __ocml_ilogb_f64(double); +__device__ +__attribute__((const)) +int __ocml_isfinite_f64(double); +__device__ +__attribute__((const)) +int __ocml_isinf_f64(double); +__device__ +__attribute__((const)) +int __ocml_isnan_f64(double); +__device__ +double __ocml_j0_f64(double); +__device__ +double __ocml_j1_f64(double); +__device__ +__attribute__((const)) +double __ocml_ldexp_f64(double, int); +__device__ +double __ocml_lgamma_f64(double); +__device__ +__attribute__((pure)) +double __ocml_log10_f64(double); +__device__ +__attribute__((pure)) +double __ocml_log1p_f64(double); +__device__ +__attribute__((pure)) +double __ocml_log2_f64(double); +__device__ +__attribute__((const)) +double __ocml_logb_f64(double); +__device__ +__attribute__((pure)) +double __ocml_log_f64(double); +__device__ +double __ocml_modf_f64(double, __attribute__((address_space(5))) double*); +__device__ +__attribute__((const)) +double __ocml_nearbyint_f64(double); +__device__ +__attribute__((const)) +double __ocml_nextafter_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_len3_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_len4_f64(double, double, double, double); +__device__ +__attribute__((pure)) +double __ocml_ncdf_f64(double); +__device__ +__attribute__((pure)) +double __ocml_ncdfinv_f64(double); +__device__ +__attribute__((pure)) +double __ocml_pow_f64(double, double); +__device__ +__attribute__((pure)) +double __ocml_rcbrt_f64(double); +__device__ +__attribute__((const)) +double __ocml_remainder_f64(double, double); +__device__ +double __ocml_remquo_f64( + double, double, __attribute__((address_space(5))) int*); +__device__ +__attribute__((const)) +double __ocml_rhypot_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_rint_f64(double); +__device__ +__attribute__((const)) +double __ocml_rlen3_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_rlen4_f64(double, double, double, double); +__device__ +__attribute__((const)) +double __ocml_round_f64(double); +__device__ +__attribute__((pure)) +double __ocml_rsqrt_f64(double); +__device__ +__attribute__((const)) +double __ocml_scalb_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_scalbn_f64(double, int); +__device__ +__attribute__((const)) +int __ocml_signbit_f64(double); +__device__ +double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*); +__device__ +double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*); +__device__ +double __ocml_sin_f64(double); +__device__ +__attribute__((pure)) +double __ocml_sinh_f64(double); +__device__ +double __ocml_sinpi_f64(double); +__device__ +__attribute__((const)) +double __ocml_sqrt_f64(double); +__device__ +double __ocml_tan_f64(double); +__device__ +__attribute__((pure)) +double __ocml_tanh_f64(double); +__device__ +double __ocml_tgamma_f64(double); +__device__ +__attribute__((const)) +double __ocml_trunc_f64(double); +__device__ +double __ocml_y0_f64(double); +__device__ +double __ocml_y1_f64(double); + +// BEGIN INTRINSICS +__device__ +__attribute__((const)) +double __ocml_add_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_add_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_add_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_add_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sub_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sub_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sub_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sub_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_mul_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_mul_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_mul_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_mul_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_fma_rte_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_fma_rtn_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_fma_rtp_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_fma_rtz_f64(double, double, double); + +__device__ +__attribute__((const)) +double __llvm_amdgcn_rcp_f64(double) __asm("llvm.amdgcn.rcp.f64"); +__device__ +__attribute__((const)) +double __llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64"); +// END INTRINSICS +// END DOUBLE + +#if defined(__cplusplus) + } // extern "C" +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/program_state.hpp b/src/utils/amd_hip/hip/hcc_detail/program_state.hpp new file mode 100644 index 000000000..da13c7c3d --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/program_state.hpp @@ -0,0 +1,108 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +struct ihipModuleSymbol_t; +using hipFunction_t = ihipModuleSymbol_t*; + +namespace std { +template <> +struct hash { + size_t operator()(hsa_agent_t x) const { return hash{}(x.handle); } +}; +} // namespace std + +inline constexpr bool operator==(hsa_agent_t x, hsa_agent_t y) { return x.handle == y.handle; } + +namespace hip_impl { +class Kernel_descriptor { + std::uint64_t kernel_object_{}; + amd_kernel_code_t const* kernel_header_{nullptr}; + std::string name_{}; +public: + Kernel_descriptor() = default; + Kernel_descriptor(std::uint64_t kernel_object, const std::string& name) + : kernel_object_{kernel_object}, name_{name} + { + bool supported{false}; + std::uint16_t min_v{UINT16_MAX}; + auto r = hsa_system_major_extension_supported( + HSA_EXTENSION_AMD_LOADER, 1, &min_v, &supported); + + if (r != HSA_STATUS_SUCCESS || !supported) return; + + hsa_ven_amd_loader_1_01_pfn_t tbl{}; + + r = hsa_system_get_major_extension_table( + HSA_EXTENSION_AMD_LOADER, + 1, + sizeof(tbl), + reinterpret_cast(&tbl)); + + if (r != HSA_STATUS_SUCCESS) return; + if (!tbl.hsa_ven_amd_loader_query_host_address) return; + + r = tbl.hsa_ven_amd_loader_query_host_address( + reinterpret_cast(kernel_object_), + reinterpret_cast(&kernel_header_)); + + if (r != HSA_STATUS_SUCCESS) return; + } + Kernel_descriptor(const Kernel_descriptor&) = default; + Kernel_descriptor(Kernel_descriptor&&) = default; + ~Kernel_descriptor() = default; + + Kernel_descriptor& operator=(const Kernel_descriptor&) = default; + Kernel_descriptor& operator=(Kernel_descriptor&&) = default; + + operator hipFunction_t() const { // TODO: this is awful and only meant for illustration. + return reinterpret_cast(const_cast(this)); + } +}; + +const std::unordered_map>& executables( + bool rebuild = false); +const std::unordered_map>>& +functions(bool rebuild = false); +const std::unordered_map& function_names(bool rebuild = false); +std::unordered_map& globals(bool rebuild = false); +const std::unordered_map< + std::string, std::vector>>& + kernargs(bool rebuild = false); + +hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, + hsa_agent_t agent); +} // Namespace hip_impl. diff --git a/src/utils/amd_hip/hip/hcc_detail/surface_functions.h b/src/utils/amd_hip/hip/hcc_detail/surface_functions.h new file mode 100644 index 000000000..b9cab1f46 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/surface_functions.h @@ -0,0 +1,59 @@ +/* +Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H + +#include + +#define __SURFACE_FUNCTIONS_DECL__ static inline __device__ +template +__SURFACE_FUNCTIONS_DECL__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, + int boundaryMode = hipBoundaryModeZero) { + hipArray* arrayPtr = (hipArray*)surfObj; + size_t width = arrayPtr->width; + size_t height = arrayPtr->height; + int32_t xOffset = x / sizeof(T); + T* dataPtr = (T*)arrayPtr->data; + if ((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0)) { + if (boundaryMode == hipBoundaryModeZero) { + *data = 0; + } + } else { + *data = *(dataPtr + y * width + xOffset); + } +} + +template +__SURFACE_FUNCTIONS_DECL__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, + int boundaryMode = hipBoundaryModeZero) { + hipArray* arrayPtr = (hipArray*)surfObj; + size_t width = arrayPtr->width; + size_t height = arrayPtr->height; + int32_t xOffset = x / sizeof(T); + T* dataPtr = (T*)arrayPtr->data; + if (!((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0))) { + *(dataPtr + y * width + xOffset) = data; + } +} + +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/texture_functions.h b/src/utils/amd_hip/hip/hcc_detail/texture_functions.h new file mode 100644 index 000000000..1f22e0117 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/texture_functions.h @@ -0,0 +1,11102 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H + +#include +#include + +#pragma push_macro("TYPEDEF_VECTOR_VALUE_TYPE") +#define TYPEDEF_VECTOR_VALUE_TYPE(SCALAR_TYPE) \ +typedef SCALAR_TYPE __hip_##SCALAR_TYPE##2_vector_value_type __attribute__((ext_vector_type(2))); \ +typedef SCALAR_TYPE __hip_##SCALAR_TYPE##3_vector_value_type __attribute__((ext_vector_type(3))); \ +typedef SCALAR_TYPE __hip_##SCALAR_TYPE##4_vector_value_type __attribute__((ext_vector_type(4))); \ +typedef SCALAR_TYPE __hip_##SCALAR_TYPE##8_vector_value_type __attribute__((ext_vector_type(8))); \ +typedef SCALAR_TYPE __hip_##SCALAR_TYPE##16_vector_value_type __attribute__((ext_vector_type(16))); + +TYPEDEF_VECTOR_VALUE_TYPE(float); +TYPEDEF_VECTOR_VALUE_TYPE(int); +TYPEDEF_VECTOR_VALUE_TYPE(uint); + +#undef TYPEDEF_VECTOR_VALUE_TYPE +#pragma pop_macro("TYPEDEF_VECTOR_VALUE_TYPE") + +union TData { + __hip_float4_vector_value_type f; + __hip_int4_vector_value_type i; + __hip_uint4_vector_value_type u; +}; + +#define __TEXTURE_FUNCTIONS_DECL__ static inline __device__ + + +#if (__hcc_workweek__ >= 18114) || __clang__ +#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4))) +#else +#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(2))) +#endif + +#define TEXTURE_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; \ + TData texel; +#define TEXTURE_REF_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)texRef.textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; \ + TData texel; +#define TEXTURE_SET_FLOAT *retVal = texel.f.x; + +#define TEXTURE_SET_SIGNED *retVal = texel.i.x; + +#define TEXTURE_SET_UNSIGNED *retVal = texel.u.x; + +#define TEXTURE_SET_FLOAT_X retVal->x = texel.f.x; + +#define TEXTURE_SET_SIGNED_X retVal->x = texel.i.x; + +#define TEXTURE_SET_UNSIGNED_X retVal->x = texel.u.x; + +#define TEXTURE_SET_FLOAT_XY \ + retVal->x = texel.f.x; \ + retVal->y = texel.f.y; + +#define TEXTURE_SET_SIGNED_XY \ + retVal->x = texel.i.x; \ + retVal->y = texel.i.y; + +#define TEXTURE_SET_UNSIGNED_XY \ + retVal->x = texel.u.x; \ + retVal->y = texel.u.y; + +#define TEXTURE_SET_FLOAT_XYZW \ + retVal->x = texel.f.x; \ + retVal->y = texel.f.y; \ + retVal->z = texel.f.z; \ + retVal->w = texel.f.w; + +#define TEXTURE_SET_SIGNED_XYZW \ + retVal->x = texel.i.x; \ + retVal->y = texel.i.y; \ + retVal->z = texel.i.z; \ + retVal->w = texel.i.w; + +#define TEXTURE_SET_UNSIGNED_XYZW \ + retVal->x = texel.u.x; \ + retVal->y = texel.u.y; \ + retVal->z = texel.u.z; \ + retVal->w = texel.u.w; + +#define TEXTURE_RETURN_CHAR return texel.i.x; + +#define TEXTURE_RETURN_UCHAR return texel.u.x; + +#define TEXTURE_RETURN_SHORT return texel.i.x; + +#define TEXTURE_RETURN_USHORT return texel.u.x; + +#define TEXTURE_RETURN_INT return texel.i.x; + +#define TEXTURE_RETURN_UINT return texel.u.x; + +#define TEXTURE_RETURN_FLOAT return texel.f.x; + +#define TEXTURE_RETURN_SIGNED return texel.i.x; + +#define TEXTURE_RETURN_UNSIGNED return texel.u.x; + +#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x); + +#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x); + +#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x); + +#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x); + +#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x); + +#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x); + +#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x); + +#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y); + +#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y); + +#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y); + +#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y); + +#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y); + +#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y); + +#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y); + +#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); + +#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); + +#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); + +#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); + +#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); + +#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); + +#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w); + +extern "C" { + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_1D( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + float c); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_1Da( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float2_vector_value_type c); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_2D( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float2_vector_value_type c); + + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_2Da( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float4_vector_value_type c); + +__device__ +float __ockl_image_sample_2Dad( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float4_vector_value_type c); + +__device__ +float __ockl_image_sample_2Dd( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float2_vector_value_type c); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_3D( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float4_vector_value_type c); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_grad_1D( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + float c, float dx, float dy); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_grad_1Da( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float2_vector_value_type c, float dx, float dy); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_grad_2D( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float2_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_grad_2Da( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float4_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy); + +__device__ +float __ockl_image_sample_grad_2Dad( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float4_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy); + +__device__ +float __ockl_image_sample_grad_2Dd( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float2_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_grad_3D( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float4_vector_value_type c, __hip_float4_vector_value_type dx, __hip_float4_vector_value_type dy); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_lod_1D( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + float c, float l); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_lod_1Da( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float2_vector_value_type c, float l); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_lod_2D( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float2_vector_value_type c, float l); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_lod_2Da( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float4_vector_value_type c, float l); + +__device__ +float __ockl_image_sample_lod_2Dad( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float4_vector_value_type c, float l); + +__device__ +float __ockl_image_sample_lod_2Dd( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float2_vector_value_type c, float l); + +__device__ +__hip_float4_vector_value_type __ockl_image_sample_lod_3D( + unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, + __hip_float4_vector_value_type c, float l); +} + +//////////////////////////////////////////////////////////// +// Texture object APIs +//////////////////////////////////////////////////////////// + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char1* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char2* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char4* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned char* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar1* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar2* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar4* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short1* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short2* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short4* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned short* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort1* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort2* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort4* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int1* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int2* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int4* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned int* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint1* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint2* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint4* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float* retVal, hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float1* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float2* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float4* retVal, hipTextureObject_t textureObject, + int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1Dfetch(hipTextureObject_t textureObject, int x) { + T ret; + tex1Dfetch(&ret, textureObject, x); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1D(char* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(char1* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(char2* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(char4* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned char* retVal, hipTextureObject_t textureObject, + float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar1* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar2* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar4* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(short* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(short1* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(short2* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(short4* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned short* retVal, hipTextureObject_t textureObject, + float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort1* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort2* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort4* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(int* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(int1* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(int2* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(int4* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned int* retVal, hipTextureObject_t textureObject, + float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint1* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint2* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint4* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(float* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(float1* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(float2* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(float4* retVal, hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_XYZW; +} +template +__TEXTURE_FUNCTIONS_DECL__ T tex1D(hipTextureObject_t textureObject, float x) { + T ret; + tex1D(&ret, textureObject, x); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char1* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char2* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char4* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned char* retVal, hipTextureObject_t textureObject, + float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar1* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar2* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar4* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short1* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short2* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short4* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned short* retVal, hipTextureObject_t textureObject, + float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort1* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort2* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort4* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int1* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int2* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int4* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned int* retVal, hipTextureObject_t textureObject, + float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint1* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint2* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint4* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float1* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float2* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float4* retVal, hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DLod(hipTextureObject_t textureObject, float x, float level) { + T ret; + tex1DLod(&ret, textureObject, x, level); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char1* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char2* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char4* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned char* retVal, hipTextureObject_t textureObject, + float x, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar1* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar2* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar4* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short1* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short2* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short4* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned short* retVal, hipTextureObject_t textureObject, + float x, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort1* retVal, hipTextureObject_t textureObject, + float x, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort2* retVal, hipTextureObject_t textureObject, + float x, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort4* retVal, hipTextureObject_t textureObject, + float x, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int1* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int2* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int4* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned int* retVal, hipTextureObject_t textureObject, + float x, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint1* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint2* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint4* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float1* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float2* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float4* retVal, hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dx, + float dy) { + T ret; + tex1DLod(&ret, textureObject, x, dx, dy); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex2D(char* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(char1* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(char2* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(char4* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned char* retVal, hipTextureObject_t textureObject, + float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar1* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar2* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar4* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(short* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(short1* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(short2* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(short4* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned short* retVal, hipTextureObject_t textureObject, + float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort1* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort2* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort4* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(int* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(int1* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(int2* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(int4* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned int* retVal, hipTextureObject_t textureObject, + float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint1* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint2* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint4* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(float* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(float1* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(float2* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(float4* retVal, hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex2D(hipTextureObject_t textureObject, float x, float y) { + T ret; + tex2D(&ret, textureObject, x, y); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char1* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char2* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char4* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned char* retVal, hipTextureObject_t textureObject, + float x, float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar1* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar2* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar4* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short1* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short2* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short4* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned short* retVal, hipTextureObject_t textureObject, + float x, float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort1* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort2* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort4* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int1* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int2* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int4* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned int* retVal, hipTextureObject_t textureObject, + float x, float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint1* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint2* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint4* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float1* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float2* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float4* retVal, hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, + float level) { + T ret; + tex2DLod(&ret, textureObject, x, y, level); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex3D(char* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(char1* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(char2* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(char4* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned char* retVal, hipTextureObject_t textureObject, + float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar1* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar2* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar4* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(short* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(short1* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(short2* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(short4* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned short* retVal, hipTextureObject_t textureObject, + float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort1* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort2* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort4* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(int* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(int1* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(int2* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(int4* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned int* retVal, hipTextureObject_t textureObject, + float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint1* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint2* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint4* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(float* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(float1* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(float2* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(float4* retVal, hipTextureObject_t textureObject, float x, + float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) { + T ret; + tex3D(&ret, textureObject, x, y, z); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char1* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char2* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char4* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned char* retVal, hipTextureObject_t textureObject, + float x, float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar1* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar2* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar4* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short1* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short2* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short4* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned short* retVal, hipTextureObject_t textureObject, + float x, float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort1* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort2* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort4* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int1* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int2* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int4* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned int* retVal, hipTextureObject_t textureObject, + float x, float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint1* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint2* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint4* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float1* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float2* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float4* retVal, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, + float level) { + T ret; + tex3DLod(&ret, textureObject, x, y, z, level); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char1* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char2* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char4* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned char* retVal, + hipTextureObject_t textureObject, float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar1* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar2* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar4* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short1* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short2* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short4* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned short* retVal, + hipTextureObject_t textureObject, float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort1* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort2* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort4* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int* retVal, hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int1* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int2* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int4* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned int* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint1* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint2* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint4* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float1* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float2* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_FLOAT_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float4* retVal, hipTextureObject_t textureObject, + float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) { + T ret; + tex1DLayered(&ret, textureObject, x, layer); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char1* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char2* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char4* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned char* retVal, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar1* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar2* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar4* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short1* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short2* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short4* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned short* retVal, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort1* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort2* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort4* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int1* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int2* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int4* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned int* retVal, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint1* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint2* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint4* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float1* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float2* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float4* retVal, hipTextureObject_t textureObject, + float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, + float level) { + T ret; + tex1DLayeredLod(&ret, textureObject, x, layer, level); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char1* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char2* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char4* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned char* retVal, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar1* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar2* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar4* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short1* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short2* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short4* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned short* retVal, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort1* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort2* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort4* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int1* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int2* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int4* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned int* retVal, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint1* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint2* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint4* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float1* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float2* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float4* retVal, hipTextureObject_t textureObject, + float x, int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, + float dx, float dy) { + T ret; + tex1DLayeredGrad(&ret, textureObject, x, layer, dx, dy); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned char* retVal, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned short* retVal, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int* retVal, hipTextureObject_t textureObject, float x, + float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned int* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, + int layer) { + T ret; + tex2DLayered(&ret, textureObject, x, y, layer); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned char* retVal, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned short* retVal, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned int* retVal, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float1* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float2* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float4* retVal, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + T ret; + tex2DLayeredLod(&ret, textureObject, x, y, layer, level); + return ret; +} + +//////////////////////////////////////////////////////////// +// Texture Reference APIs +//////////////////////////////////////////////////////////// +template +__TEXTURE_FUNCTIONS_DECL__ char tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1Dfetch(texture texRef, + int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1Dfetch(texture texRef, + int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1Dfetch(texture texRef, + int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1Dfetch(texture texRef, int x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1Dfetch(texture texRef, + hipTextureObject_t textureObject, int x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// +template +__TEXTURE_FUNCTIONS_DECL__ char tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1D(texture texRef, + float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1D(texture texRef, + float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// +template +__TEXTURE_FUNCTIONS_DECL__ char tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT; +} +////// +template +__TEXTURE_FUNCTIONS_DECL__ float tex1D(texture texRef, float x) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1D(texture texRef, + hipTextureObject_t textureObject, float x) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR_X; +} +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLod(texture texRef, + float x, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLod(texture texRef, + float x, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLod(texture texRef, float x, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLod(texture texRef, + float x, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLod(texture texRef, float x, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR_X; +} +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLod(texture texRef, + hipTextureObject_t textureObject, float x, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DGrad(texture texRef, + float x, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DGrad(texture texRef, + float x, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DGrad(texture texRef, + float x, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DGrad(texture texRef, float x, float dx, + float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DGrad(texture texRef, float x, + float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float dx, + float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2D(texture texRef, + float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2D(texture texRef, + float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2D(texture texRef, float x, + float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UINT_XYZW; +} + + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2D(texture texRef, + hipTextureObject_t textureObject, float x, + float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2D(texture texRef, float x, float y) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2D(texture texRef, + hipTextureObject_t textureObject, float x, float y) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLod(texture texRef, + float x, float y, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLod(texture texRef, + float x, float y, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLod(texture texRef, float x, + float y, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLod(texture texRef, float x, + float y, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLod(texture texRef, float x, + float y, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLod(texture texRef, + float x, float y, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLod(texture texRef, float x, float y, + float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DGrad(texture texRef, + float x, float y, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DGrad(texture texRef, + float x, float y, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DGrad(texture texRef, float x, + float y, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DGrad(texture texRef, float x, + float y, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DGrad(texture texRef, float x, + float y, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DGrad(texture texRef, + float x, float y, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DGrad(texture texRef, float x, float y, + float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3D(texture texRef, + float x, float y, float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3D(texture texRef, + float x, float y, float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3D(texture texRef, float x, + float y, float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex3D(texture texRef, float x, float y, + float z) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex3D(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DLod(texture texRef, + float x, float y, float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DLod(texture texRef, + float x, float y, float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex3DLod(texture texRef, float x, float y, + float z, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, float z, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex3DLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, + level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DGrad(texture texRef, + float x, float y, float z, float4 dx, + float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3DGrad(texture texRef, + float x, float y, float z, float4 dx, + float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3DGrad(texture texRef, float x, + float y, float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3DGrad(texture texRef, float x, + float y, float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3DGrad(texture texRef, float x, + float y, float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DGrad(texture texRef, + float x, float y, float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex3DGrad(texture texRef, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// +template +__TEXTURE_FUNCTIONS_DECL__ char tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex3DGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + float z, float4 dx, float4 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, + float4(dx.x, dx.y, dx.z, dx.w).data, + float4(dy.x, dy.y, dy.z, dy.w).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayered(texture texRef, + float x, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayered( + texture texRef, float x, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayered(texture texRef, + float x, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayered(texture texRef, float x, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayered( + texture texRef, hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredLod( + texture texRef, float x, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredLod( + texture texRef, float x, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredLod(texture texRef, + float x, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredLod(texture texRef, float x, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredLod( + texture texRef, hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredLod( + texture texRef, hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, int layer, + float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredGrad( + texture texRef, float x, int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredGrad( + texture texRef, hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredGrad( + texture texRef, float x, int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredGrad( + texture texRef, hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredGrad( + texture texRef, float x, int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredGrad( + texture texRef, hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredGrad(texture texRef, float x, + int layer, float dx, float dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + int layer, float dx, float dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLayered(texture texRef, float x, float y, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayered(texture texRef, + float x, float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayered( + texture texRef, float x, float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayered( + texture texRef, hipTextureObject_t textureObject, float x, + float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLayered(texture texRef, float x, float y, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayered(texture texRef, float x, float y, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayered(texture texRef, float x, float y, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayered(texture texRef, float x, float y, + int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayered(texture texRef, + float x, float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayered(texture texRef, float x, + float y, int layer) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayered(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredLod( + texture texRef, float x, float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredLod( + texture texRef, hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredLod( + texture texRef, float x, float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredLod( + texture texRef, hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredLod(texture texRef, float x, float y, + int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredLod(texture texRef, + float x, float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredLod(texture texRef, float x, + float y, int layer, float level) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredLod(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da( + i, s, float4(x, y, layer, 0.0f).data, level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredGrad( + texture texRef, float x, float y, int layer, float2 dx, + float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredGrad( + texture texRef, hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredGrad( + texture texRef, float x, float y, int layer, float2 dx, + float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredGrad( + texture texRef, hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, float y, + int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredGrad( + texture texRef, float x, float y, int layer, float2 dx, + float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredGrad( + texture texRef, hipTextureObject_t textureObject, float x, float y, + int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredGrad(texture texRef, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_REF_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredGrad(texture texRef, + hipTextureObject_t textureObject, float x, + float y, int layer, float2 dx, float2 dy) { + TEXTURE_PARAMETERS_INIT; + texel.f = + __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, + float2(dx.x, dx.y).data, + float2(dy.x, dy.y).data); + TEXTURE_RETURN_FLOAT_XYZW; +} +#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/texture_types.h b/src/utils/amd_hip/hip/hcc_detail/texture_types.h new file mode 100644 index 000000000..bc334de24 --- /dev/null +++ b/src/utils/amd_hip/hip/hcc_detail/texture_types.h @@ -0,0 +1,107 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_TYPES_H + +#include + +#define hipTextureType1D 0x01 +#define hipTextureType2D 0x02 +#define hipTextureType3D 0x03 +#define hipTextureTypeCubemap 0x0C +#define hipTextureType1DLayered 0xF1 +#define hipTextureType2DLayered 0xF2 +#define hipTextureTypeCubemapLayered 0xFC + +/** + * Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD + */ +#define HIP_IMAGE_OBJECT_SIZE_DWORD 12 +#define HIP_SAMPLER_OBJECT_SIZE_DWORD 8 +#define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD +#define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD) + +/** + * An opaque value that represents a hip texture object + */ +typedef unsigned long long hipTextureObject_t; + +/** + * hip texture address modes + */ +enum hipTextureAddressMode { + hipAddressModeWrap = 0, + hipAddressModeClamp = 1, + hipAddressModeMirror = 2, + hipAddressModeBorder = 3 +}; + +/** + * hip texture filter modes + */ +enum hipTextureFilterMode { hipFilterModePoint = 0, hipFilterModeLinear = 1 }; + +/** + * hip texture read modes + */ +enum hipTextureReadMode { hipReadModeElementType = 0, hipReadModeNormalizedFloat = 1 }; + +/** + * hip texture reference + */ +typedef struct textureReference { + int normalized; + enum hipTextureFilterMode filterMode; + enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions + struct hipChannelFormatDesc channelDesc; + int sRGB; // Perform sRGB->linear conversion during texture read + unsigned int maxAnisotropy; // Limit to the anisotropy ratio + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; + + hipTextureObject_t textureObject; + int numChannels; + enum hipArray_Format format; +}textureReference; + +/** + * hip texture descriptor + */ +typedef struct hipTextureDesc { + enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions + enum hipTextureFilterMode filterMode; + enum hipTextureReadMode readMode; + int sRGB; // Perform sRGB->linear conversion during texture read + float borderColor[4]; + int normalizedCoords; + unsigned int maxAnisotropy; + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; +}hipTextureDesc; + +#endif diff --git a/src/utils/amd_hip/hip/hip_common.h b/src/utils/amd_hip/hip/hip_common.h new file mode 100644 index 000000000..71285fc8d --- /dev/null +++ b/src/utils/amd_hip/hip/hip_common.h @@ -0,0 +1,79 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_COMMON_H +#define HIP_INCLUDE_HIP_HIP_COMMON_H + +// Common code included at start of every hip file. +// Auto enable __HIP_PLATFORM_HCC__ if compiling with HCC +// Other compiler (GCC,ICC,etc) need to set one of these macros explicitly +#if defined(__HCC__) || (defined(__clang__) && defined(__HIP__)) +#define __HIP_PLATFORM_HCC__ +#endif //__HCC__ + +// Auto enable __HIP_PLATFORM_NVCC__ if compiling with NVCC +#if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__)) +#define __HIP_PLATFORM_NVCC__ +#ifdef __CUDACC__ +#define __HIPCC__ +#endif + +#endif //__NVCC__ + +// Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path +#if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) || \ + (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0) +#define __HIP_DEVICE_COMPILE__ 1 +#endif + +#if __HIP_DEVICE_COMPILE__ == 0 +// 32-bit Atomics +#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0) +#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) + +// 64-bit Atomics +#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0) +#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) + +// Doubles +#define __HIP_ARCH_HAS_DOUBLES__ (0) + +// Warp cross-lane operations +#define __HIP_ARCH_HAS_WARP_VOTE__ (0) +#define __HIP_ARCH_HAS_WARP_BALLOT__ (0) +#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0) +#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) + +// Sync +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0) +#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) + +// Misc +#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) +#define __HIP_ARCH_HAS_3DGRID__ (0) +#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) +#endif + +#endif diff --git a/src/utils/amd_hip/hip/hip_complex.h b/src/utils/amd_hip/hip/hip_complex.h new file mode 100644 index 000000000..fb9cad5e4 --- /dev/null +++ b/src/utils/amd_hip/hip/hip_complex.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_COMPLEX_H +#define HIP_INCLUDE_HIP_HIP_COMPLEX_H + +#include + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + +#endif diff --git a/src/utils/amd_hip/hip/hip_fp16.h b/src/utils/amd_hip/hip/hip_fp16.h new file mode 100644 index 000000000..994ce62bd --- /dev/null +++ b/src/utils/amd_hip/hip/hip_fp16.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_FP16_H +#define HIP_INCLUDE_HIP_HIP_FP16_H + +#include + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include "cuda_fp16.h" +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + +#endif diff --git a/src/utils/amd_hip/hip/hip_hcc.h b/src/utils/amd_hip/hip/hip_hcc.h new file mode 100644 index 000000000..c07a57fb3 --- /dev/null +++ b/src/utils/amd_hip/hip/hip_hcc.h @@ -0,0 +1,105 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_HCC_H +#define HIP_INCLUDE_HIP_HIP_HCC_H + +#ifdef __HCC__ + +#include "hip/hip_runtime_api.h" + +// Forward declarations: +namespace hc { +class accelerator; +class accelerator_view; +}; // namespace hc + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup HCC-specific features + * @warning These APIs provide access to special features of HCC compiler and are not available + *through the CUDA path. + * @{ + */ + + +/** + * @brief Return hc::accelerator associated with the specified deviceId + * @return #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc); + +/** + * @brief Return hc::accelerator_view associated with the specified stream + * + * If stream is 0, the accelerator_view for the default stream is returned. + * @return #hipSuccess + */ +hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av); + + +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + to kernelparams or extra + * + * @param [in[ f Kernel to launch. + * @param [in] gridDimX X grid dimension specified in work-items + * @param [in] gridDimY Y grid dimension specified in work-items + * @param [in] gridDimZ Z grid dimension specified in work-items + * @param [in] blockDimX X block dimensions specified in work-items + * @param [in] blockDimY Y grid dimension specified in work-items + * @param [in] blockDimZ Z grid dimension specified in work-items + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + kernel can access this with HIP_DYNAMIC_SHARED. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th + default stream is used with associated synchronization rules. + * @param [in] kernelParams + * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and + must be in the memory layout and alignment expected by the kernel. + * @param [in] startEvent If non-null, specified event will be updated to track the start time of + the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of + the kernel launch. The event must be created before calling this API. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please + refer to hip_porting_driver_api.md for sample usage. + + * HIP/ROCm actually updates the start event when the associated kernel completes. + */ +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent = nullptr, + hipEvent_t stopEvent = nullptr); + +// doxygen end HCC-specific features +/** + * @} + */ +#endif // #ifdef __HCC__ +#endif // #ifdef HIP_INCLUDE_HIP_HIP_HCC_H diff --git a/src/utils/amd_hip/hip/hip_profile.h b/src/utils/amd_hip/hip/hip_profile.h new file mode 100644 index 000000000..747483925 --- /dev/null +++ b/src/utils/amd_hip/hip/hip_profile.h @@ -0,0 +1,42 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_PROFILE_H +#define HIP_INCLUDE_HIP_HIP_PROFILE_H + +#if not defined(ENABLE_HIP_PROFILE) +#define ENABLE_HIP_PROFILE 1 +#endif + +#if defined(__HIP_PLATFORM_HCC__) and (ENABLE_HIP_PROFILE == 1) +#include +#define HIP_SCOPED_MARKER(markerName, group) \ + amdtScopedMarker __scopedMarker(markerName, group, nullptr); +#define HIP_BEGIN_MARKER(markerName, group) amdtBeginMarker(markerName, group, nullptr); +#define HIP_END_MARKER() amdtEndMarker(); +#else +#define HIP_SCOPED_MARKER(markerName, group) +#define HIP_BEGIN_MARKER(markerName, group) +#define HIP_END_MARKER() +#endif + +#endif diff --git a/src/utils/amd_hip/hip/hip_runtime.h b/src/utils/amd_hip/hip/hip_runtime.h new file mode 100644 index 000000000..937ba61ec --- /dev/null +++ b/src/utils/amd_hip/hip/hip_runtime.h @@ -0,0 +1,67 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//! HIP = Heterogeneous-compute Interface for Portability +//! +//! Define a extremely thin runtime layer that allows source code to be compiled unmodified +//! through either AMD HCC or NVCC. Key features tend to be in the spirit +//! and terminology of CUDA, but with a portable path to other accelerators as well: +// +//! Both paths support rich C++ features including classes, templates, lambdas, etc. +//! Runtime API is C +//! Memory management is based on pure pointers and resembles malloc/free/copy. +// +//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch +//! macros. hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ +//! features. + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_H + +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: +#include +#include +#include +#include + +#if __cplusplus > 199711L +#include +#endif + + +#include + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + + +#include +#include + +#endif \ No newline at end of file diff --git a/src/utils/amd_hip/hip/hip_runtime_api.h b/src/utils/amd_hip/hip/hip_runtime_api.h new file mode 100644 index 000000000..34363689e --- /dev/null +++ b/src/utils/amd_hip/hip/hip_runtime_api.h @@ -0,0 +1,342 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hip_runtime_api.h + * + * @brief Defines the API signatures for HIP runtime. + * This file can be compiled with a standard compiler. + */ + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H + + +#include // for getDeviceProp +#include + +enum { + HIP_SUCCESS = 0, + HIP_ERROR_INVALID_VALUE, + HIP_ERROR_NOT_INITIALIZED, + HIP_ERROR_LAUNCH_OUT_OF_RESOURCES +}; + +typedef struct { + // 32-bit Atomics + unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. + unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. + unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. + unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. + unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. + + // 64-bit Atomics + unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. + unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. + + // Doubles + unsigned hasDoubles : 1; ///< Double-precision floating point. + + // Warp cross-lane operations + unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). + unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). + unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). + unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. + + // Sync + unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. + unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. + + // Misc + unsigned hasSurfaceFuncs : 1; ///< Surface functions. + unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). + unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. +} hipDeviceArch_t; + + +//--- +// Common headers for both NVCC and HCC paths: + +/** + * hipDeviceProp + * + */ +typedef struct hipDeviceProp_t { + char name[256]; ///< Device name. + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + size_t totalConstMem; ///< Size of shared memory region (in bytes). + int major; ///< Major compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int minor; ///< Minor compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int multiProcessorCount; ///< Number of multi-processors (compute units). + int l2CacheSize; ///< L2 cache size. + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int computeMode; ///< Compute mode. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" + ///< instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int pciDomainID; ///< PCI Domain ID + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int canMapHostMemory; ///< Check whether HIP can map host memory + int gcnArch; ///< AMD GCN Arch Value. Eg: 803, 701 + int integrated; ///< APU vs dGPU +} hipDeviceProp_t; + + +/** + * Memory type (for pointer attributes) + */ +typedef enum hipMemoryType { + hipMemoryTypeHost, ///< Memory is physically located on host + hipMemoryTypeDevice, ///< Memory is physically located on device. (see deviceId for specific + ///< device) + hipMemoryTypeArray, ///< Array memory, physically located on device. (see deviceId for specific + ///< device) + hipMemoryTypeUnified ///< Not used currently +}hipMemoryType; + + +/** + * Pointer attributes + */ +typedef struct hipPointerAttribute_t { + enum hipMemoryType memoryType; + int device; + void* devicePointer; + void* hostPointer; + int isManaged; + unsigned allocationFlags; /* flags specified when memory was allocated*/ + /* peers? */ +} hipPointerAttribute_t; + + +// hack to get these to show up in Doxygen: +/** + * @defgroup GlobalDefs Global enum and defines + * @{ + * + */ + +// Ignoring error-code return values from hip APIs is discouraged. On C++17, +// we can make that yield a warning +#if __cplusplus >= 201703L +#define __HIP_NODISCARD [[nodiscard]] +#else +#define __HIP_NODISCARD +#endif + +/* + * @brief hipError_t + * @enum + * @ingroup Enumerations + */ +// Developer note - when updating these, update the hipErrorName and hipErrorString functions in +// NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path. + +typedef enum __HIP_NODISCARD hipError_t { + hipSuccess = 0, ///< Successful completion. + hipErrorOutOfMemory = 2, + hipErrorNotInitialized = 3, + hipErrorDeinitialized = 4, + hipErrorProfilerDisabled = 5, + hipErrorProfilerNotInitialized = 6, + hipErrorProfilerAlreadyStarted = 7, + hipErrorProfilerAlreadyStopped = 8, + hipErrorInsufficientDriver = 35, + hipErrorInvalidImage = 200, + hipErrorInvalidContext = 201, ///< Produced when input context is invalid. + hipErrorContextAlreadyCurrent = 202, + hipErrorMapFailed = 205, + hipErrorUnmapFailed = 206, + hipErrorArrayIsMapped = 207, + hipErrorAlreadyMapped = 208, + hipErrorNoBinaryForGpu = 209, + hipErrorAlreadyAcquired = 210, + hipErrorNotMapped = 211, + hipErrorNotMappedAsArray = 212, + hipErrorNotMappedAsPointer = 213, + hipErrorECCNotCorrectable = 214, + hipErrorUnsupportedLimit = 215, + hipErrorContextAlreadyInUse = 216, + hipErrorPeerAccessUnsupported = 217, + hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX + hipErrorInvalidGraphicsContext = 219, + hipErrorInvalidSource = 300, + hipErrorFileNotFound = 301, + hipErrorSharedObjectSymbolNotFound = 302, + hipErrorSharedObjectInitFailed = 303, + hipErrorOperatingSystem = 304, + hipErrorSetOnActiveProcess = 305, + hipErrorInvalidHandle = 400, + hipErrorNotFound = 500, + hipErrorIllegalAddress = 700, + hipErrorInvalidSymbol = 701, + // Runtime Error Codes start here. + hipErrorMissingConfiguration = 1001, + hipErrorMemoryAllocation = 1002, ///< Memory allocation error. + hipErrorInitializationError = 1003, ///< TODO comment from hipErrorInitializationError + hipErrorLaunchFailure = + 1004, ///< An exception occurred on the device while executing a kernel. + hipErrorPriorLaunchFailure = 1005, + hipErrorLaunchTimeOut = 1006, + hipErrorLaunchOutOfResources = 1007, ///< Out of resources error. + hipErrorInvalidDeviceFunction = 1008, + hipErrorInvalidConfiguration = 1009, + hipErrorInvalidDevice = 1010, ///< DeviceID must be in range 0...#compute-devices. + hipErrorInvalidValue = 1011, ///< One or more of the parameters passed to the API call is NULL + ///< or not in an acceptable range. + hipErrorInvalidDevicePointer = 1017, ///< Invalid Device Pointer + hipErrorInvalidMemcpyDirection = 1021, ///< Invalid memory copy direction + hipErrorUnknown = 1030, ///< Unknown error. + hipErrorInvalidResourceHandle = 1033, ///< Resource handle (hipEvent_t or hipStream_t) invalid. + hipErrorNotReady = 1034, ///< Indicates that asynchronous operations enqueued earlier are not + ///< ready. This is not actually an error, but is used to distinguish + ///< from hipSuccess (which indicates completion). APIs that return + ///< this error include hipEventQuery and hipStreamQuery. + hipErrorNoDevice = 1038, ///< Call to hipGetDeviceCount returned 0 devices + hipErrorPeerAccessAlreadyEnabled = + 1050, ///< Peer access was already enabled from the current device. + + hipErrorPeerAccessNotEnabled = + 1051, ///< Peer access was never enabled from the current device. + hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen + ///< in production systems. + hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically + ///< not seen in production systems. + hipErrorHostMemoryAlreadyRegistered = + 1061, ///< Produced when trying to lock a page-locked memory. + hipErrorHostMemoryNotRegistered = + 1062, ///< Produced when trying to unlock a non-page-locked memory. + hipErrorMapBufferObjectFailed = + 1071, ///< Produced when the IPC memory attach failed from ROCr. + hipErrorAssert = + 1081, ///< Produced when the kernel calls assert. + hipErrorTbd ///< Marker that more error codes are needed. +} hipError_t; + +#undef __HIP_NODISCARD + +/* + * @brief hipDeviceAttribute_t + * @enum + * @ingroup Enumerations + */ +typedef enum hipDeviceAttribute_t { + hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. + hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block. + hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block. + hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block. + hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid. + hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid. + hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid. + hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in + ///< bytes. + hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. + hipDeviceAttributeWarpSize, ///< Warp size in threads. + hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a + ///< thread block. This number is shared by all thread + ///< blocks simultaneously resident on a + ///< multiprocessor. + hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. + hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. + hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. + hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. + hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 + ///< cache. + hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per + ///< multiprocessor. + hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. + hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. + hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels + ///< concurrently. + hipDeviceAttributePciBusId, ///< PCI Bus ID. + hipDeviceAttributePciDeviceId, ///< PCI Device ID. + hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per + ///< Multiprocessor. + hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. + hipDeviceAttributeIntegrated, ///< iGPU +} hipDeviceAttribute_t; + +enum hipComputeMode { + hipComputeModeDefault = 0, + hipComputeModeExclusive = 1, + hipComputeModeProhibited = 2, + hipComputeModeExclusiveProcess = 3 +}; + +/** + * @} + */ + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include "hip/hcc_detail/hip_runtime_api.h" +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include "hip/nvcc_detail/hip_runtime_api.h" +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + + +/** + * @brief: C++ wrapper for hipMalloc + * + * Perform automatic type conversion to eliminate need for excessive typecasting (ie void**) + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMalloc + */ +#if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__) +template +static inline hipError_t hipMalloc(T** devPtr, size_t size) { + return hipMalloc((void**)devPtr, size); +} + +// Provide an override to automatically typecast the pointer type from void**, and also provide a +// default for the flags. +template +static inline hipError_t hipHostMalloc(T** ptr, size_t size, + unsigned int flags = hipHostMallocDefault) { + return hipHostMalloc((void**)ptr, size, flags); +} +#endif + +#endif diff --git a/src/utils/amd_hip/hip/hip_texture_types.h b/src/utils/amd_hip/hip/hip_texture_types.h new file mode 100644 index 000000000..a7feab011 --- /dev/null +++ b/src/utils/amd_hip/hip/hip_texture_types.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + + +#endif diff --git a/src/utils/amd_hip/hip/hip_vector_types.h b/src/utils/amd_hip/hip/hip_vector_types.h new file mode 100644 index 000000000..c1a0373c0 --- /dev/null +++ b/src/utils/amd_hip/hip/hip_vector_types.h @@ -0,0 +1,41 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//! hip_vector_types.h : Defines the HIP vector types. + +#ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H +#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H + +#include + + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#if __cplusplus +#include +#endif +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + +#endif diff --git a/src/utils/amd_hip/hip/math_functions.h b/src/utils/amd_hip/hip/math_functions.h new file mode 100644 index 000000000..2dfec4551 --- /dev/null +++ b/src/utils/amd_hip/hip/math_functions.h @@ -0,0 +1,40 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_MATH_FUNCTIONS_H +#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H + +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: + +#include + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +//#include +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + +#endif diff --git a/src/utils/amd_hip/hip/nvcc_detail/channel_descriptor.h b/src/utils/amd_hip/hip/nvcc_detail/channel_descriptor.h new file mode 100644 index 000000000..c3e9dc1ff --- /dev/null +++ b/src/utils/amd_hip/hip/nvcc_detail/channel_descriptor.h @@ -0,0 +1,28 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H + +#include "channel_descriptor.h" + +#endif diff --git a/src/utils/amd_hip/hip/nvcc_detail/hip_complex.h b/src/utils/amd_hip/hip/nvcc_detail/hip_complex.h new file mode 100644 index 000000000..d0e45d26d --- /dev/null +++ b/src/utils/amd_hip/hip/nvcc_detail/hip_complex.h @@ -0,0 +1,119 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H +#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H + +#include "cuComplex.h" + +typedef cuFloatComplex hipFloatComplex; + +__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return cuCrealf(z); } + +__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return cuCimagf(z); } + +__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { + return make_cuFloatComplex(a, b); +} + +__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { return cuConjf(z); } + +__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) { + return cuCabsf(z) * cuCabsf(z); +} + +__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { + return cuCaddf(p, q); +} + +__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { + return cuCsubf(p, q); +} + +__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { + return cuCmulf(p, q); +} + +__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { + return cuCdivf(p, q); +} + +__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return cuCabsf(z); } + +typedef cuDoubleComplex hipDoubleComplex; + +__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return cuCreal(z); } + +__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return cuCimag(z); } + +__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { + return make_cuDoubleComplex(a, b); +} + +__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { return cuConj(z); } + +__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) { + return cuCabs(z) * cuCabs(z); +} + +__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { + return cuCadd(p, q); +} + +__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { + return cuCsub(p, q); +} + +__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { + return cuCmul(p, q); +} + +__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { + return cuCdiv(p, q); +} + +__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return cuCabs(z); } + +typedef cuFloatComplex hipComplex; + +__device__ __host__ static inline hipComplex make_Complex(float x, float y) { + return make_cuComplex(x, y); +} + +__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { + return cuComplexDoubleToFloat(z); +} + +__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { + return cuComplexFloatToDouble(z); +} + +__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { + return cuCfmaf(p, q, r); +} + +__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, + hipDoubleComplex r) { + return cuCfma(p, q, r); +} + +#endif diff --git a/src/utils/amd_hip/hip/nvcc_detail/hip_runtime.h b/src/utils/amd_hip/hip/nvcc_detail/hip_runtime.h new file mode 100644 index 000000000..19d740a1e --- /dev/null +++ b/src/utils/amd_hip/hip/nvcc_detail/hip_runtime.h @@ -0,0 +1,126 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H +#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H + +#include + +#include + +#define HIP_KERNEL_NAME(...) __VA_ARGS__ + +typedef int hipLaunchParm; + +#define hipLaunchKernel(kernelName, numblocks, numthreads, memperblock, streamId, ...) \ + do { \ + kernelName<<>>(0, ##__VA_ARGS__); \ + } while (0) + +#define hipLaunchKernelGGL(kernelName, numblocks, numthreads, memperblock, streamId, ...) \ + do { \ + kernelName<<>>(__VA_ARGS__); \ + } while (0) + +#define hipReadModeElementType cudaReadModeElementType + +#ifdef __CUDA_ARCH__ + + +// 32-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (__CUDA_ARCH__ >= 110) +#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 110) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (__CUDA_ARCH__ >= 120) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 120) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (__CUDA_ARCH__ >= 200) + +// 64-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (__CUDA_ARCH__ >= 120) + +// Doubles +#define __HIP_ARCH_HAS_DOUBLES__ (__CUDA_ARCH__ >= 120) + +// warp cross-lane operations: +#define __HIP_ARCH_HAS_WARP_VOTE__ (__CUDA_ARCH__ >= 120) +#define __HIP_ARCH_HAS_WARP_BALLOT__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (__CUDA_ARCH__ >= 300) +#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (__CUDA_ARCH__ >= 350) + +// sync +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (__CUDA_ARCH__ >= 200) + +// misc +#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_3DGRID__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (__CUDA_ARCH__ >= 350) + +#endif + +#ifdef __CUDACC__ + + +#define hipThreadIdx_x threadIdx.x +#define hipThreadIdx_y threadIdx.y +#define hipThreadIdx_z threadIdx.z + +#define hipBlockIdx_x blockIdx.x +#define hipBlockIdx_y blockIdx.y +#define hipBlockIdx_z blockIdx.z + +#define hipBlockDim_x blockDim.x +#define hipBlockDim_y blockDim.y +#define hipBlockDim_z blockDim.z + +#define hipGridDim_x gridDim.x +#define hipGridDim_y gridDim.y +#define hipGridDim_z gridDim.z + +#define HIP_SYMBOL(X) X + +/** + * extern __shared__ + */ + +#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[]; + +#define HIP_DYNAMIC_SHARED_ATTRIBUTE + +#ifdef __HIP_DEVICE_COMPILE__ +#define abort() \ + { asm("trap;"); } +#undef assert +#define assert(COND) \ + { \ + if (!COND) { \ + abort(); \ + } \ + } +#endif + +#define __clock() clock() +#define __clock64() clock64() + +#endif + +#endif diff --git a/src/utils/amd_hip/hip/nvcc_detail/hip_runtime_api.h b/src/utils/amd_hip/hip/nvcc_detail/hip_runtime_api.h new file mode 100644 index 000000000..02c4b7ee6 --- /dev/null +++ b/src/utils/amd_hip/hip/nvcc_detail/hip_runtime_api.h @@ -0,0 +1,1286 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H +#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +#define __dparm(x) = x +#else +#define __dparm(x) +#endif + +// TODO -move to include/hip_runtime_api.h as a common implementation. +/** + * Memory copy types + * + */ +typedef enum hipMemcpyKind { + hipMemcpyHostToHost, + hipMemcpyHostToDevice, + hipMemcpyDeviceToHost, + hipMemcpyDeviceToDevice, + hipMemcpyDefault +} hipMemcpyKind; + +// hipTextureAddressMode +#define hipTextureAddressMode cudaTextureAddressMode +#define hipAddressModeWrap cudaAddressModeWrap +#define hipAddressModeClamp cudaAddressModeClamp +#define hipAddressModeMirror cudaAddressModeMirror +#define hipAddressModeBorder cudaAddressModeBorder + +// hipTextureFilterMode +#define hipTextureFilterMode cudaTextureFilterMode +#define hipFilterModePoint cudaFilterModePoint +#define hipFilterModeLinear cudaFilterModeLinear + +// hipTextureReadMode +#define hipTextureReadMode cudaTextureReadMode +#define hipReadModeElementType cudaReadModeElementType +#define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat + +typedef enum hipChannelFormatKind { + hipChannelFormatKindSigned = 0, + hipChannelFormatKindUnsigned = 1, + hipChannelFormatKindFloat = 2, + hipChannelFormatKindNone = 3 +} hipChannelFormatKind; + +#define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode +#define hipBoundaryModeZero cudaBoundaryModeZero +#define hipBoundaryModeTrap cudaBoundaryModeTrap +#define hipBoundaryModeClamp cudaBoundaryModeClamp + +// hipResourceType +#define hipResourceType cudaResourceType +#define hipResourceTypeArray cudaResourceTypeArray +#define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray +#define hipResourceTypeLinear cudaResourceTypeLinear +#define hipResourceTypePitch2D cudaResourceTypePitch2D +// +// hipErrorNoDevice. + + +//! Flags that can be used with hipEventCreateWithFlags: +#define hipEventDefault cudaEventDefault +#define hipEventBlockingSync cudaEventBlockingSync +#define hipEventDisableTiming cudaEventDisableTiming +#define hipEventInterprocess cudaEventInterprocess +#define hipEventReleaseToDevice 0 /* no-op on CUDA platform */ +#define hipEventReleaseToSystem 0 /* no-op on CUDA platform */ + + +#define hipHostMallocDefault cudaHostAllocDefault +#define hipHostMallocPortable cudaHostAllocPortable +#define hipHostMallocMapped cudaHostAllocMapped +#define hipHostMallocWriteCombined cudaHostAllocWriteCombined +#define hipHostMallocCoherent 0x0 +#define hipHostMallocNonCoherent 0x0 + +#define hipHostRegisterDefault cudaHostRegisterDefault +#define hipHostRegisterPortable cudaHostRegisterPortable +#define hipHostRegisterMapped cudaHostRegisterMapped +#define hipHostRegisterIoMemory cudaHostRegisterIoMemory + +#define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER +#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE +#define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END +#define hipLimitMallocHeapSize cudaLimitMallocHeapSize +#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess + +// enum CUjit_option redefines +#define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS +#define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK +#define hipJitOptionWallTime CU_JIT_WALL_TIME +#define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER +#define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES +#define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER +#define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES +#define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL +#define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT +#define hipJitOptionTarget CU_JIT_TARGET +#define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY +#define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO +#define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE +#define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO +#define hipJitOptionCacheMode CU_JIT_CACHE_MODE +#define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT +#define hipJitOptionFastCompile CU_JIT_FAST_COMPILE +#define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS + +typedef cudaEvent_t hipEvent_t; +typedef cudaStream_t hipStream_t; +typedef cudaIpcEventHandle_t hipIpcEventHandle_t; +typedef cudaIpcMemHandle_t hipIpcMemHandle_t; +typedef enum cudaLimit hipLimit_t; +typedef enum cudaFuncCache hipFuncCache_t; +typedef CUcontext hipCtx_t; +typedef cudaSharedMemConfig hipSharedMemConfig; +typedef CUfunc_cache hipFuncCache; +typedef CUjit_option hipJitOption; +typedef CUdevice hipDevice_t; +typedef CUmodule hipModule_t; +typedef CUfunction hipFunction_t; +typedef CUdeviceptr hipDeviceptr_t; +typedef struct cudaArray hipArray; +typedef struct cudaArray* hipArray_const_t; +typedef cudaFuncAttributes hipFuncAttributes; +#define hipMemcpy3DParms cudaMemcpy3DParms +#define hipArrayDefault cudaArrayDefault +#define hipArrayLayered cudaArrayLayered +#define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore +#define hipArrayCubemap cudaArrayCubemap +#define hipArrayTextureGather cudaArrayTextureGather + +typedef cudaTextureObject_t hipTextureObject_t; +typedef cudaSurfaceObject_t hipSurfaceObject_t; +#define hipTextureType1D cudaTextureType1D +#define hipTextureType1DLayered cudaTextureType1DLayered +#define hipTextureType2D cudaTextureType2D +#define hipTextureType2DLayered cudaTextureType2DLayered +#define hipTextureType3D cudaTextureType3D +#define hipDeviceMapHost cudaDeviceMapHost + +#define hipExtent cudaExtent +#define hipPitchedPtr cudaPitchedPtr +#define make_hipExtent make_cudaExtent +#define make_hipPos make_cudaPos +#define make_hipPitchedPtr make_cudaPitchedPtr +// Flags that can be used with hipStreamCreateWithFlags +#define hipStreamDefault cudaStreamDefault +#define hipStreamNonBlocking cudaStreamNonBlocking + +#define hipChannelFormatDesc cudaChannelFormatDesc +#define hipResourceDesc cudaResourceDesc +#define hipTextureDesc cudaTextureDesc +#define hipResourceViewDesc cudaResourceViewDesc +// adding code for hipmemSharedConfig +#define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault +#define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte +#define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte + +inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) { + switch (cuError) { + case cudaSuccess: + return hipSuccess; + case cudaErrorMemoryAllocation: + return hipErrorMemoryAllocation; + case cudaErrorLaunchOutOfResources: + return hipErrorLaunchOutOfResources; + case cudaErrorInvalidValue: + return hipErrorInvalidValue; + case cudaErrorInvalidResourceHandle: + return hipErrorInvalidResourceHandle; + case cudaErrorInvalidDevice: + return hipErrorInvalidDevice; + case cudaErrorInvalidMemcpyDirection: + return hipErrorInvalidMemcpyDirection; + case cudaErrorInvalidDevicePointer: + return hipErrorInvalidDevicePointer; + case cudaErrorInitializationError: + return hipErrorInitializationError; + case cudaErrorNoDevice: + return hipErrorNoDevice; + case cudaErrorNotReady: + return hipErrorNotReady; + case cudaErrorUnknown: + return hipErrorUnknown; + case cudaErrorPeerAccessNotEnabled: + return hipErrorPeerAccessNotEnabled; + case cudaErrorPeerAccessAlreadyEnabled: + return hipErrorPeerAccessAlreadyEnabled; + case cudaErrorHostMemoryAlreadyRegistered: + return hipErrorHostMemoryAlreadyRegistered; + case cudaErrorHostMemoryNotRegistered: + return hipErrorHostMemoryNotRegistered; + case cudaErrorUnsupportedLimit: + return hipErrorUnsupportedLimit; + default: + return hipErrorUnknown; // Note - translated error. + } +} + +inline static hipError_t hipCUResultTohipError(CUresult cuError) { // TODO Populate further + switch (cuError) { + case CUDA_SUCCESS: + return hipSuccess; + case CUDA_ERROR_OUT_OF_MEMORY: + return hipErrorMemoryAllocation; + case CUDA_ERROR_INVALID_VALUE: + return hipErrorInvalidValue; + case CUDA_ERROR_INVALID_DEVICE: + return hipErrorInvalidDevice; + case CUDA_ERROR_DEINITIALIZED: + return hipErrorDeinitialized; + case CUDA_ERROR_NO_DEVICE: + return hipErrorNoDevice; + case CUDA_ERROR_INVALID_CONTEXT: + return hipErrorInvalidContext; + case CUDA_ERROR_NOT_INITIALIZED: + return hipErrorNotInitialized; + default: + return hipErrorUnknown; // Note - translated error. + } +} + +// TODO match the error enum names of hip and cuda +inline static cudaError_t hipErrorToCudaError(hipError_t hError) { + switch (hError) { + case hipSuccess: + return cudaSuccess; + case hipErrorMemoryAllocation: + return cudaErrorMemoryAllocation; + case hipErrorLaunchOutOfResources: + return cudaErrorLaunchOutOfResources; + case hipErrorInvalidValue: + return cudaErrorInvalidValue; + case hipErrorInvalidResourceHandle: + return cudaErrorInvalidResourceHandle; + case hipErrorInvalidDevice: + return cudaErrorInvalidDevice; + case hipErrorInvalidMemcpyDirection: + return cudaErrorInvalidMemcpyDirection; + case hipErrorInvalidDevicePointer: + return cudaErrorInvalidDevicePointer; + case hipErrorInitializationError: + return cudaErrorInitializationError; + case hipErrorNoDevice: + return cudaErrorNoDevice; + case hipErrorNotReady: + return cudaErrorNotReady; + case hipErrorUnknown: + return cudaErrorUnknown; + case hipErrorPeerAccessNotEnabled: + return cudaErrorPeerAccessNotEnabled; + case hipErrorPeerAccessAlreadyEnabled: + return cudaErrorPeerAccessAlreadyEnabled; + case hipErrorRuntimeMemory: + return cudaErrorUnknown; // Does not exist in CUDA + case hipErrorRuntimeOther: + return cudaErrorUnknown; // Does not exist in CUDA + case hipErrorHostMemoryAlreadyRegistered: + return cudaErrorHostMemoryAlreadyRegistered; + case hipErrorHostMemoryNotRegistered: + return cudaErrorHostMemoryNotRegistered; + case hipErrorTbd: + return cudaErrorUnknown; // Note - translated error. + default: + return cudaErrorUnknown; // Note - translated error. + } +} + +inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) { + switch (kind) { + case hipMemcpyHostToHost: + return cudaMemcpyHostToHost; + case hipMemcpyHostToDevice: + return cudaMemcpyHostToDevice; + case hipMemcpyDeviceToHost: + return cudaMemcpyDeviceToHost; + case hipMemcpyDeviceToDevice: + return cudaMemcpyDeviceToDevice; + default: + return cudaMemcpyDefault; + } +} + +inline static cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode( + hipTextureAddressMode kind) { + switch (kind) { + case hipAddressModeWrap: + return cudaAddressModeWrap; + case hipAddressModeClamp: + return cudaAddressModeClamp; + case hipAddressModeMirror: + return cudaAddressModeMirror; + case hipAddressModeBorder: + return cudaAddressModeBorder; + default: + return cudaAddressModeWrap; + } +} + +inline static cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode( + hipTextureFilterMode kind) { + switch (kind) { + case hipFilterModePoint: + return cudaFilterModePoint; + case hipFilterModeLinear: + return cudaFilterModeLinear; + default: + return cudaFilterModePoint; + } +} + +inline static cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) { + switch (kind) { + case hipReadModeElementType: + return cudaReadModeElementType; + case hipReadModeNormalizedFloat: + return cudaReadModeNormalizedFloat; + default: + return cudaReadModeElementType; + } +} + +inline static cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind( + hipChannelFormatKind kind) { + switch (kind) { + case hipChannelFormatKindSigned: + return cudaChannelFormatKindSigned; + case hipChannelFormatKindUnsigned: + return cudaChannelFormatKindUnsigned; + case hipChannelFormatKindFloat: + return cudaChannelFormatKindFloat; + case hipChannelFormatKindNone: + return cudaChannelFormatKindNone; + default: + return cudaChannelFormatKindNone; + } +} + +/** + * Stream CallBack struct + */ +#define HIPRT_CB CUDART_CB +typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); +inline static hipError_t hipInit(unsigned int flags) { + return hipCUResultTohipError(cuInit(flags)); +} + +inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); } + +inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); } + +inline static hipError_t hipPeekAtLastError() { + return hipCUDAErrorTohipError(cudaPeekAtLastError()); +} + +inline static hipError_t hipMalloc(void** ptr, size_t size) { + return hipCUDAErrorTohipError(cudaMalloc(ptr, size)); +} + +inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { + return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height)); +} + +inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { + return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent)); +} + +inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); } + +inline static hipError_t hipMallocHost(void** ptr, size_t size) + __attribute__((deprecated("use hipHostMalloc instead"))); +inline static hipError_t hipMallocHost(void** ptr, size_t size) { + return hipCUDAErrorTohipError(cudaMallocHost(ptr, size)); +} + +inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) + __attribute__((deprecated("use hipHostMalloc instead"))); +inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) { + return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); +} + +inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) { + return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); +} + +inline static hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* desc, + size_t width, size_t height, + unsigned int flags __dparm(hipArrayDefault)) { + return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags)); +} + +inline static hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int flags) { + return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags)); +} + +inline static hipError_t hipFreeArray(hipArray* array) { + return hipCUDAErrorTohipError(cudaFreeArray(array)); +} + +inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) { + return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags)); +} + +inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { + return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr)); +} + +inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) { + return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags)); +} + +inline static hipError_t hipHostUnregister(void* ptr) { + return hipCUDAErrorTohipError(cudaHostUnregister(ptr)); +} + +inline static hipError_t hipFreeHost(void* ptr) + __attribute__((deprecated("use hipHostFree instead"))); +inline static hipError_t hipFreeHost(void* ptr) { + return hipCUDAErrorTohipError(cudaFreeHost(ptr)); +} + +inline static hipError_t hipHostFree(void* ptr) { + return hipCUDAErrorTohipError(cudaFreeHost(ptr)); +} + +inline static hipError_t hipSetDevice(int device) { + return hipCUDAErrorTohipError(cudaSetDevice(device)); +} + +inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) { + struct cudaDeviceProp cdprop; + memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp)); + cdprop.major = prop->major; + cdprop.minor = prop->minor; + cdprop.totalGlobalMem = prop->totalGlobalMem; + cdprop.sharedMemPerBlock = prop->sharedMemPerBlock; + cdprop.regsPerBlock = prop->regsPerBlock; + cdprop.warpSize = prop->warpSize; + cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock; + cdprop.clockRate = prop->clockRate; + cdprop.totalConstMem = prop->totalConstMem; + cdprop.multiProcessorCount = prop->multiProcessorCount; + cdprop.l2CacheSize = prop->l2CacheSize; + cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor; + cdprop.computeMode = prop->computeMode; + cdprop.canMapHostMemory = prop->canMapHostMemory; + cdprop.memoryClockRate = prop->memoryClockRate; + cdprop.memoryBusWidth = prop->memoryBusWidth; + return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop)); +} + +inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) { + return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size)); +} + +inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) { + return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size)); +} + +inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) { + return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size)); +} + +inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size, + hipStream_t stream) { + return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream)); +} + +inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size, + hipStream_t stream) { + return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream)); +} + +inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size, + hipStream_t stream) { + return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream)); +} + +inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind copyKind) { + return hipCUDAErrorTohipError( + cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind))); +} + + +inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError( + cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream)); +} + +inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) { + return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset, + hipMemcpyKindToCudaMemcpyKind(copyType))); +} + +inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, + size_t sizeBytes, size_t offset, + hipMemcpyKind copyType, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync( + symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream)); +} + +inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { + return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset, + hipMemcpyKindToCudaMemcpyKind(kind))); +} + +inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, + size_t sizeBytes, size_t offset, + hipMemcpyKind kind, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync( + dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream)); +} + +inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { + return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName)); +} + +inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) { + return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName)); +} + +inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + return hipCUDAErrorTohipError( + cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind))); +} + +inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) +{ + return hipCUDAErrorTohipError(cudaMemcpy3D(p)); +} + +inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind, + hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, + hipMemcpyKindToCudaMemcpyKind(kind), stream)); +} + +inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, + const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, + height, hipMemcpyKindToCudaMemcpyKind(kind))); +} + +inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, + const void* src, size_t count, hipMemcpyKind kind) { + return hipCUDAErrorTohipError( + cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind))); +} + +inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, + size_t hOffset, size_t count, hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count, + hipMemcpyKindToCudaMemcpyKind(kind))); +} + +inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, + size_t count) { + return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count)); +} + +inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, + size_t count) { + return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count)); +} + +inline static hipError_t hipDeviceSynchronize() { + return hipCUDAErrorTohipError(cudaDeviceSynchronize()); +} + +inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) { + return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig)); +} + +inline static const char* hipGetErrorString(hipError_t error) { + return cudaGetErrorString(hipErrorToCudaError(error)); +} + +inline static const char* hipGetErrorName(hipError_t error) { + return cudaGetErrorName(hipErrorToCudaError(error)); +} + +inline static hipError_t hipGetDeviceCount(int* count) { + return hipCUDAErrorTohipError(cudaGetDeviceCount(count)); +} + +inline static hipError_t hipGetDevice(int* device) { + return hipCUDAErrorTohipError(cudaGetDevice(device)); +} + +inline static hipError_t hipIpcCloseMemHandle(void* devPtr) { + return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr)); +} + +inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) { + return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event)); +} + +inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) { + return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr)); +} + +inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) { + return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle)); +} + +inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags)); +} + +inline static hipError_t hipMemset(void* devPtr, int value, size_t count) { + return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count)); +} + +inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream)); +} + +inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) { + return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes)); +} + +inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { + return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height)); +} + +inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream)); +} + +inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){ + return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent)); +} + +inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){ + return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream)); +} + +inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) { + struct cudaDeviceProp cdprop; + cudaError_t cerror; + cerror = cudaGetDeviceProperties(&cdprop, device); + strncpy(p_prop->name, cdprop.name, 256); + p_prop->totalGlobalMem = cdprop.totalGlobalMem; + p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock; + p_prop->regsPerBlock = cdprop.regsPerBlock; + p_prop->warpSize = cdprop.warpSize; + for (int i = 0; i < 3; i++) { + p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i]; + p_prop->maxGridSize[i] = cdprop.maxGridSize[i]; + } + p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock; + p_prop->clockRate = cdprop.clockRate; + p_prop->totalConstMem = cdprop.totalConstMem; + p_prop->major = cdprop.major; + p_prop->minor = cdprop.minor; + p_prop->multiProcessorCount = cdprop.multiProcessorCount; + p_prop->l2CacheSize = cdprop.l2CacheSize; + p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor; + p_prop->computeMode = cdprop.computeMode; + p_prop->canMapHostMemory = cdprop.canMapHostMemory; + p_prop->memoryClockRate = cdprop.memoryClockRate; + p_prop->memoryBusWidth = cdprop.memoryBusWidth; + + // Same as clock-rate: + p_prop->clockInstructionRate = cdprop.clockRate; + + int ccVers = p_prop->major * 100 + p_prop->minor * 10; + + p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110); + p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110); + p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120); + p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120); + + p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200); + + p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120); + p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110); + + p_prop->arch.hasDoubles = (ccVers >= 130); + + p_prop->arch.hasWarpVote = (ccVers >= 120); + p_prop->arch.hasWarpBallot = (ccVers >= 200); + p_prop->arch.hasWarpShuffle = (ccVers >= 300); + p_prop->arch.hasFunnelShift = (ccVers >= 350); + + p_prop->arch.hasThreadFenceSystem = (ccVers >= 200); + p_prop->arch.hasSyncThreadsExt = (ccVers >= 200); + + p_prop->arch.hasSurfaceFuncs = (ccVers >= 200); + p_prop->arch.has3dGrid = (ccVers >= 200); + p_prop->arch.hasDynamicParallelism = (ccVers >= 350); + + p_prop->concurrentKernels = cdprop.concurrentKernels; + p_prop->pciBusID = cdprop.pciBusID; + p_prop->pciDeviceID = cdprop.pciDeviceID; + p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor; + p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard; + + return hipCUDAErrorTohipError(cerror); +} + +inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { + enum cudaDeviceAttr cdattr; + cudaError_t cerror; + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + cdattr = cudaDevAttrMaxThreadsPerBlock; + break; + case hipDeviceAttributeMaxBlockDimX: + cdattr = cudaDevAttrMaxBlockDimX; + break; + case hipDeviceAttributeMaxBlockDimY: + cdattr = cudaDevAttrMaxBlockDimY; + break; + case hipDeviceAttributeMaxBlockDimZ: + cdattr = cudaDevAttrMaxBlockDimZ; + break; + case hipDeviceAttributeMaxGridDimX: + cdattr = cudaDevAttrMaxGridDimX; + break; + case hipDeviceAttributeMaxGridDimY: + cdattr = cudaDevAttrMaxGridDimY; + break; + case hipDeviceAttributeMaxGridDimZ: + cdattr = cudaDevAttrMaxGridDimZ; + break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + cdattr = cudaDevAttrMaxSharedMemoryPerBlock; + break; + case hipDeviceAttributeTotalConstantMemory: + cdattr = cudaDevAttrTotalConstantMemory; + break; + case hipDeviceAttributeWarpSize: + cdattr = cudaDevAttrWarpSize; + break; + case hipDeviceAttributeMaxRegistersPerBlock: + cdattr = cudaDevAttrMaxRegistersPerBlock; + break; + case hipDeviceAttributeClockRate: + cdattr = cudaDevAttrClockRate; + break; + case hipDeviceAttributeMemoryClockRate: + cdattr = cudaDevAttrMemoryClockRate; + break; + case hipDeviceAttributeMemoryBusWidth: + cdattr = cudaDevAttrGlobalMemoryBusWidth; + break; + case hipDeviceAttributeMultiprocessorCount: + cdattr = cudaDevAttrMultiProcessorCount; + break; + case hipDeviceAttributeComputeMode: + cdattr = cudaDevAttrComputeMode; + break; + case hipDeviceAttributeL2CacheSize: + cdattr = cudaDevAttrL2CacheSize; + break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; + break; + case hipDeviceAttributeComputeCapabilityMajor: + cdattr = cudaDevAttrComputeCapabilityMajor; + break; + case hipDeviceAttributeComputeCapabilityMinor: + cdattr = cudaDevAttrComputeCapabilityMinor; + break; + case hipDeviceAttributeConcurrentKernels: + cdattr = cudaDevAttrConcurrentKernels; + break; + case hipDeviceAttributePciBusId: + cdattr = cudaDevAttrPciBusId; + break; + case hipDeviceAttributePciDeviceId: + cdattr = cudaDevAttrPciDeviceId; + break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor; + break; + case hipDeviceAttributeIsMultiGpuBoard: + cdattr = cudaDevAttrIsMultiGpuBoard; + break; + case hipDeviceAttributeIntegrated: + cdattr = cudaDevAttrIntegrated; + break; + default: + cerror = cudaErrorInvalidValue; + break; + } + + cerror = cudaDeviceGetAttribute(pi, cdattr, device); + + return hipCUDAErrorTohipError(cerror); +} + +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + const void* func, + int blockSize, + size_t dynamicSMemSize) { + cudaError_t cerror; + cerror = + cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, blockSize, dynamicSMemSize); + return hipCUDAErrorTohipError(cerror); +} + +inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, void* ptr) { + struct cudaPointerAttributes cPA; + hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr)); + if (err == hipSuccess) { + switch (cPA.memoryType) { + case cudaMemoryTypeDevice: + attributes->memoryType = hipMemoryTypeDevice; + break; + case cudaMemoryTypeHost: + attributes->memoryType = hipMemoryTypeHost; + break; + default: + return hipErrorUnknown; + } + attributes->device = cPA.device; + attributes->devicePointer = cPA.devicePointer; + attributes->hostPointer = cPA.hostPointer; + attributes->isManaged = 0; + attributes->allocationFlags = 0; + } + return err; +} + + +inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) { + return hipCUDAErrorTohipError(cudaMemGetInfo(free, total)); +} + +inline static hipError_t hipEventCreate(hipEvent_t* event) { + return hipCUDAErrorTohipError(cudaEventCreate(event)); +} + +inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) { + return hipCUDAErrorTohipError(cudaEventRecord(event, stream)); +} + +inline static hipError_t hipEventSynchronize(hipEvent_t event) { + return hipCUDAErrorTohipError(cudaEventSynchronize(event)); +} + +inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) { + return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop)); +} + +inline static hipError_t hipEventDestroy(hipEvent_t event) { + return hipCUDAErrorTohipError(cudaEventDestroy(event)); +} + + +inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) { + return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags)); +} + +inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { + return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority)); +} + +inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { + return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority)); +} + +inline static hipError_t hipStreamCreate(hipStream_t* stream) { + return hipCUDAErrorTohipError(cudaStreamCreate(stream)); +} + +inline static hipError_t hipStreamSynchronize(hipStream_t stream) { + return hipCUDAErrorTohipError(cudaStreamSynchronize(stream)); +} + +inline static hipError_t hipStreamDestroy(hipStream_t stream) { + return hipCUDAErrorTohipError(cudaStreamDestroy(stream)); +} + +inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { + return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags)); +} + +inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) { + return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority)); +} + +inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags)); +} + +inline static hipError_t hipStreamQuery(hipStream_t stream) { + return hipCUDAErrorTohipError(cudaStreamQuery(stream)); +} + +inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, + void* userData, unsigned int flags) { + return hipCUDAErrorTohipError( + cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags)); +} + +inline static hipError_t hipDriverGetVersion(int* driverVersion) { + cudaError_t err = cudaDriverGetVersion(driverVersion); + + // Override driver version to match version reported on HCC side. + *driverVersion = 4; + + return hipCUDAErrorTohipError(err); +} + +inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) { + return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion)); +} + +inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) { + return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice)); +} + +inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) { + return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice)); +} + +inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { + return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags)); +} + +inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { + return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx)); +} + +inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { + return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags)); +} + +inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, + int* active) { + return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active)); +} + +inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { + return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev)); +} + +inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { + return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev)); +} + +inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { + return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev)); +} + +inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { + return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags)); +} + +inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, + hipDeviceptr_t dptr) { + return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr)); +} + +inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, + size_t count) { + return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count)); +} + +inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, + int srcDevice, size_t count, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError( + cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream)); +} + +// Profile APIs: +inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); } + +inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); } + +inline static hipError_t hipSetDeviceFlags(unsigned int flags) { + return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags)); +} + +inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) { + return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags)); +} + +inline static hipError_t hipEventQuery(hipEvent_t event) { + return hipCUDAErrorTohipError(cudaEventQuery(event)); +} + +inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) { + return hipCUResultTohipError(cuCtxCreate(ctx, flags, device)); +} + +inline static hipError_t hipCtxDestroy(hipCtx_t ctx) { + return hipCUResultTohipError(cuCtxDestroy(ctx)); +} + +inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { + return hipCUResultTohipError(cuCtxPopCurrent(ctx)); +} + +inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) { + return hipCUResultTohipError(cuCtxPushCurrent(ctx)); +} + +inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) { + return hipCUResultTohipError(cuCtxSetCurrent(ctx)); +} + +inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { + return hipCUResultTohipError(cuCtxGetCurrent(ctx)); +} + +inline static hipError_t hipCtxGetDevice(hipDevice_t* device) { + return hipCUResultTohipError(cuCtxGetDevice(device)); +} + +inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { + return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion)); +} + +inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) { + return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig)); +} + +inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) { + return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig)); +} + +inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { + return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config)); +} + +inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { + return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig)); +} + +inline static hipError_t hipCtxSynchronize(void) { + return hipCUResultTohipError(cuCtxSynchronize()); +} + +inline static hipError_t hipCtxGetFlags(unsigned int* flags) { + return hipCUResultTohipError(cuCtxGetFlags(flags)); +} + +inline static hipError_t hipCtxDetach(hipCtx_t ctx) { + return hipCUResultTohipError(cuCtxDetach(ctx)); +} + +inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) { + return hipCUResultTohipError(cuDeviceGet(device, ordinal)); +} + +inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) { + return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device)); +} + +inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) { + return hipCUResultTohipError(cuDeviceGetName(name, len, device)); +} + +inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) { + return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device)); +} + +inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) { + return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId)); +} + +inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) { + return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config)); +} + +inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) { + return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config)); +} + +inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) { + return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit)); +} + +inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) { + return hipCUResultTohipError(cuDeviceTotalMem(bytes, device)); +} + +inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { + return hipCUResultTohipError(cuModuleLoad(module, fname)); +} + +inline static hipError_t hipModuleUnload(hipModule_t hmod) { + return hipCUResultTohipError(cuModuleUnload(hmod)); +} + +inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, + const char* kname) { + return hipCUResultTohipError(cuModuleGetFunction(function, module, kname)); +} + +inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { + return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func)); +} + +inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name) { + return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name)); +} + +inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) { + return hipCUResultTohipError(cuModuleLoadData(module, image)); +} + +inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, + unsigned int numOptions, hipJitOption* options, + void** optionValues) { + return hipCUResultTohipError( + cuModuleLoadDataEx(module, image, numOptions, options, optionValues)); +} + +inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, + unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, + hipStream_t stream, void** kernelParams, + void** extra) { + return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, + blockDimY, blockDimZ, sharedMemBytes, stream, + kernelParams, extra)); +} + + +inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) { + return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig)); +} + +#ifdef __cplusplus +} +#endif + +#ifdef __CUDACC__ + +template +inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func, + size_t dynamicSMemSize = 0, + int blockSizeLimit = 0, + unsigned int flags = 0) { + cudaError_t cerror; + cerror = cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, dynamicSMemSize, + blockSizeLimit, flags); + return hipCUDAErrorTohipError(cerror); +} + +template +inline static hipError_t hipBindTexture(size_t* offset, const struct texture& tex, + const void* devPtr, size_t size = UINT_MAX) { + return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size)); +} + +template +inline static hipError_t hipBindTexture(size_t* offset, struct texture& tex, + const void* devPtr, const struct hipChannelFormatDesc& desc, + size_t size = UINT_MAX) { + return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); +} + +template +inline static hipError_t hipUnbindTexture(struct texture* tex) { + return hipCUDAErrorTohipError(cudaUnbindTexture(tex)); +} + +inline static hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t size = UINT_MAX){ + return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); +} + +template +inline static hipError_t hipBindTextureToArray(struct texture& tex, + hipArray_const_t array, + const struct hipChannelFormatDesc& desc) { + return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); +} + +template +inline static hipError_t hipBindTextureToArray(struct texture *tex, + hipArray_const_t array, + const struct hipChannelFormatDesc* desc) { + return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); +} + +template +inline static hipError_t hipBindTextureToArray(struct texture& tex, + hipArray_const_t array) { + return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array)); +} + +template +inline static hipChannelFormatDesc hipCreateChannelDesc() { + return cudaCreateChannelDesc(); +} + +inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, + hipChannelFormatKind f) { + return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f)); +} + +inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + return hipCUDAErrorTohipError( + cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc)); +} + +inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { + return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject)); +} + +inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, + const hipResourceDesc* pResDesc) { + return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc)); +} + +inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { + return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject)); +} + +inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t textureObject) { + return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject)); +} + +inline static hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref) +{ + return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref)); +} + +inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) +{ + return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array)); +} +#endif //__CUDACC__ + +#endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H diff --git a/src/utils/amd_hip/hip/nvcc_detail/hip_texture_types.h b/src/utils/amd_hip/hip/nvcc_detail/hip_texture_types.h new file mode 100644 index 000000000..751dd8e4d --- /dev/null +++ b/src/utils/amd_hip/hip/nvcc_detail/hip_texture_types.h @@ -0,0 +1,6 @@ +#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H + +#include + +#endif diff --git a/src/utils/amd_hip/hip/texture_types.h b/src/utils/amd_hip/hip/texture_types.h new file mode 100644 index 000000000..7d785708d --- /dev/null +++ b/src/utils/amd_hip/hip/texture_types.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_TEXTURE_TYPES_H + +#include + +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) +#include "texture_types.h" +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif + +#endif From 8e2c26d15e534e13c8a345b6fbc8bb6c20a481e8 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Wed, 20 Feb 2019 11:10:21 +0100 Subject: [PATCH 07/28] Added ROCM compilation to CMake --- CMakeLists.txt | 55 ++++- cmake/modules/FindROCM.cmake | 427 +++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 17 ++ 3 files changed, 494 insertions(+), 5 deletions(-) create mode 100644 cmake/modules/FindROCM.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 8aa16e1d5..7cd10dd4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,9 @@ cmake_minimum_required(VERSION 3.6) # Intel C++11 support starts from 3.6 or even later version project(SIRIUS) +# allow {module}_ROOT variables to be set +cmake_policy(SET CMP0074 NEW) + # set language and standard enable_language(CXX Fortran) set(CMAKE_CXX_STANDARD 11) @@ -12,6 +15,7 @@ set(BUILD_DOCS OFF CACHE BOOL "build doxygen doc") set(USE_ELPA OFF CACHE BOOL "use scalapack") set(USE_MAGMA OFF CACHE BOOL "use MAGMA") set(USE_CUDA OFF CACHE BOOL "use CUDA") +set(USE_ROCM OFF CACHE BOOL "use ROCM AMD GPU code") set(USE_MKL OFF CACHE BOOL "use Intel MKL") set(USE_CRAY_LIBSCI OFF CACHE BOOL "use LAPACK/SCALAPACK from Cray LIBSCI") set(USE_SCALAPACK OFF CACHE BOOL "use scalapack") @@ -108,6 +112,9 @@ if(USE_CUDA) #list(APPEND CMAKE_CUDA_FLAGS "-arch=sm_61") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -arch=sm_61") endif() + # tell HIP to call cuda api + add_definitions("-D__HIP_PLATFORM_NVCC__") + include_directories(BEFORE ${PROJECT_SOURCE_DIR}/src/utils/amd_hip) endif(USE_CUDA) if(USE_MAGMA) @@ -120,6 +127,35 @@ if(USE_MAGMA) set(SYSTEM_LIBRARIES "${SYSTEM_LIBRARIES};${MAGMA_LIBRARIES}") endif(USE_MAGMA) +if(USE_CUDA AND USE_ROCM) + message(FATAL_ERROR "USE_CUDA and USE_ROCM must not be enabled at the same time!") +endif() + +if(USE_ROCM) + # TODO + message(STATUS "WARNING: ROCM enabled, prototype feature! Only limited functionality available.") + find_package(ROCM COMPONENTS rocfft) + if(NOT ${ROCM_HIP_PLATFORM} STREQUAL hcc) + message(FATAL_ERROR "Compilation on Nvidia platform not supported with ROCM enabled!") + endif() + add_definitions("-D__GPU") + add_definitions("-D__ROCM") + add_definitions(${ROCM_DEFINITIONS}) + include_directories(${ROCM_INCLUDE_DIRS}) + + # set compile flags to use with HIPCC compiler + set(HIPCC_COMPILE_FLAGS) + set(BUILD_TYPES DEBUG RELEASE RELWITHDEBINFO) + if(CMAKE_BUILD_TYPE) + string(TOUPPER ${CMAKE_BUILD_TYPE} BUILD_TYPE_UPPER) + foreach(build_type IN LISTS BUILD_TYPES) + if("${build_type}" STREQUAL "${BUILD_TYPE_UPPER}") + list(APPEND HIPCC_COMPILE_FLAGS ${CMAKE_CXX_FLAGS_${BUILD_TYPE_UPPER}}) + endif() + endforeach() + endif() +endif() + # add required libraries set(SYSTEM_LIBRARIES "${SYSTEM_LIBRARIES};${MPI_CXX_LIBRARIES};${GSL_LIBRARY};${LIBXC_LIBRARIES};${LIBSPG_LIBRARIES}") @@ -193,14 +229,22 @@ MACRO(SIRIUS_SETUP_TARGET _target) if(USE_MKL) # TODO: handle -lpthread properly if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") - target_link_libraries(${_target} PRIVATE "${sirius_cu_location};${SYSTEM_LIBRARIES};${HDF5_C_LIBRARIES};${HDF5_C_HL_LIBRARIES};-Wl,--no-as-needed;${MKL_LIBRARIES};-lpthread -lm -ldl -lgomp") + target_link_libraries(${_target} PRIVATE "${SYSTEM_LIBRARIES};${HDF5_C_LIBRARIES};${HDF5_C_HL_LIBRARIES};-Wl,--no-as-needed;${MKL_LIBRARIES};-lpthread -lm -ldl -lgomp") elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - target_link_libraries(${_target} PRIVATE "${sirius_cu_location};${SYSTEM_LIBRARIES};${MKL_LIBRARIES};${HDF5_C_LIBRARIES};${HDF5_C_HL_LIBRARIES};-lpthread") + target_link_libraries(${_target} PRIVATE "${SYSTEM_LIBRARIES};${MKL_LIBRARIES};${HDF5_C_LIBRARIES};${HDF5_C_HL_LIBRARIES};-lpthread") else() message(FATAL_ERROR "Unsupported compiler") endif() else() - target_link_libraries(${_target} PRIVATE "${sirius_cu_location};${SYSTEM_LIBRARIES};${LAPACK_LIBRARIES};${FFTW_LIBRARIES};${HDF5_C_LIBRARIES};${HDF5_C_HL_LIBRARIES}") + target_link_libraries(${_target} PRIVATE "${SYSTEM_LIBRARIES};${LAPACK_LIBRARIES};${FFTW_LIBRARIES};${HDF5_C_LIBRARIES};${HDF5_C_HL_LIBRARIES}") + endif() + + if(USE_CUDA) + target_link_libraries(${_target} PRIVATE "${sirius_cu_location}") + endif() + + if(USE_ROCM) + target_link_libraries(${_target} PRIVATE sirius_rocm sirius_cpp ${ROCM_LIBRARIES}) endif() ENDMACRO() @@ -211,12 +255,13 @@ if(USE_CUDA) endif() # applications -add_subdirectory(apps/atoms) -add_subdirectory(apps/dft_loop) if(BUILD_TESTS) add_subdirectory(apps/tests) add_subdirectory(apps/unit_tests) endif(BUILD_TESTS) + +add_subdirectory(apps/atoms) +add_subdirectory(apps/dft_loop) add_subdirectory(apps/utils) add_subdirectory(python_module) add_subdirectory(doc) diff --git a/cmake/modules/FindROCM.cmake b/cmake/modules/FindROCM.cmake new file mode 100644 index 000000000..df7efcd6e --- /dev/null +++ b/cmake/modules/FindROCM.cmake @@ -0,0 +1,427 @@ +# - Find the ROCM library +# +# Usage: +# find_package(ROCM [REQUIRED] [QUIET] COMPONENTS [components ...] ) +# +# Compnents available: +# - hipblas +# - hipsparse +# - rocfft +# - rocblas +# - rocsparse +# +# Commands made available: +# rocm_hip_add_library( [STATIC | SHARED] [FLAGS] [OUTPUT_DIR] [INCLUDE_DIRS] ) +# --- Compiles source files into an imported library with hipcc. No global defitions or include directories are taken into account. +# +# The following variables can be set for compilation: +# ROCM_HIPCC_FLAGS ----------------- Flags passed on to hipcc compiler +# ROCM_HIPCC_FLAGS_DEBUG ----------- Flags passed on to hipcc compiler in DEBUG mode +# ROCM_HIPCC_FLAGS_RELEASE --------- Flags passed on to hipcc compiler in RELEASE mode +# ROCM_HIPCC_FLAGS_RELWITHDEBINFO -- Flags passed on to hipcc compiler in RELWITHDEBINFO mode +# ROCM_HIPCC_FLAGS_MINSIZEREL ------ Flags passed on to hipcc compiler in MINSIZEREL mode +# +# The following variables can be set to specify a search location +# ROCM_ROOT ------------ if set, the libraries are exclusively searched under this path +# _ROOT ------ if set, search for component specific libraries at given path. Takes precedence over ROCM_ROOT +# +# The following variables are generated: +# ROCM_FOUND ------------------- true if ROCM is found on the system +# ROCM_LIBRARIES --------------- full path to ROCM +# ROCM_INCLUDE_DIRS ------------ ROCM include directories +# ROCM_DEFINITIONS ------------- ROCM definitions +# ROCM_HCC_EXECUTABLE ---------- ROCM HCC compiler +# ROCM_HCC-CONFIG_EXECUTABLE --- ROCM HCC config +# ROCM_HIPCC_EXECUTABLE -------- HIPCC compiler +# ROCM_HIPCONFIG_EXECUTABLE ---- hip config +# ROCM_HIPIFY-PERL_EXECUTABLE -- hipify +# ROCM_HIP_PLATFORM ------------ Platform identifier: "hcc" or "nvcc" +# + + +set(ROCM_HIPCC_FLAGS "" CACHE STRING "Flags for HIPCC Compiler") +set(ROCM_HIPCC_FLAGS_DEBUG "-g" CACHE STRING "Debug flags for HIPCC Compiler") +set(ROCM_HIPCC_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "Release flags for HIPCC Compiler") +set(ROCM_HIPCC_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG" CACHE STRING "Release with debug flags for HIPCC Compiler") +set(ROCM_HIPCC_FLAGS_MINSIZEREL "-Os -DNDEBUG" CACHE STRING "Minimum size flags for HIPCC Compiler") + +#If environment variable ROCM_ROOT is specified +if(NOT ROCM_ROOT AND ENV{ROCM_ROOT}) + file(TO_CMAKE_PATH "$ENV{ROCM_ROOT}" ROCM_ROOT) + set(ROCM_ROOT "${ROCM_ROOT}" CACHE PATH "Root directory for ROCM installation.") +endif() + +set(ROCM_FOUND FALSE) +set(ROCM_LIBRARIES) +set(ROCM_INCLUDE_DIRS) +set(ROCM_DEFINITIONS) +unset(ROCM_HCC_EXECUTABLE) +unset(ROCM_HCC-CONFIG_EXECUTABLE) +unset(ROCM_HIPCC_EXECUTABLE) +unset(ROCM_HIPCONFIG_EXECUTABLE) +unset(ROCM_HIPFIY-PERL-EXECUTABLE) +unset(ROCM_HIP_PLATFORM) + +include(FindPackageHandleStandardArgs) + + +# Finds libraries and include path for rocm modules +# IN: +# - module_name: name of a module (e.g. hcc) +# - following arguments: name of libraries required +# OUT: +# - ROCM_LIBRARIES: Appends to list of libraries +# - ROCM_INCLUDE_DIRS: Appends to include dirs +function(find_rcm_module module_name) + # convert module name to upper case for consistent variable naming + string(TOUPPER ${module_name} MODULE_NAME_UPPER) + + + if(DEFINED ${MODULE_NAME_UPPER}_ROOT) + set(ROOT_DIR ${${MODULE_NAME_UPPER}_ROOT}) + elseif(DEFINED ROCM_ROOT) + set(ROOT_DIR ${ROCM_ROOT}) + endif() + + # get abosolute path to avoid issues with tilde + if(ROOT_DIR) + get_filename_component(ROOT_DIR ${ROOT_DIR} ABSOLUTE) + endif() + + # remove module name from input arguments + set(LIBRARY_NAMES ${ARGV}) + list(REMOVE_AT LIBRARY_NAMES 0) + + if(${ROCM_FIND_REQUIRED}) + set(ROCM_${MODULE_NAME_UPPER}_FIND_REQUIRED TRUE) + else() + set(ROCM_${MODULE_NAME_UPPER}_FIND_REQUIRED FALSE) + endif() + if(${ROCM_FIND_QUIETLY}) + set(ROCM_${MODULE_NAME_UPPER}_FIND_QUIETLY TRUE) + else() + set(ROCM_${MODULE_NAME_UPPER}_FIND_QUIETLY FALSE) + endif() + + set(ROCM_LIBRARIES_${MODULE_NAME_UPPER}) + + if(ROOT_DIR) + # find libraries + foreach(library_name IN LISTS LIBRARY_NAMES) + find_library( + ROCM_LIBRARIES_${library_name} + NAMES ${library_name} + PATHS ${ROOT_DIR} + PATH_SUFFIXES "lib" "${module_name}/lib" + NO_DEFAULT_PATH + ) + find_package_handle_standard_args(ROCM_${MODULE_NAME_UPPER} FAIL_MESSAGE + "For ROCM module ${module_name}, library ${library_name} could not be found. Please specify ROCM_ROOT or ${MODULE_NAME_UPPER}_ROOT." + REQUIRED_VARS ROCM_LIBRARIES_${library_name}) + if(ROCM_LIBRARIES_${library_name}) + list(APPEND ROCM_LIBRARIES_${MODULE_NAME_UPPER} ${ROCM_LIBRARIES_${library_name}}) + mark_as_advanced(ROCM_LIBRARIES_${library_name}) + endif() + endforeach() + + # find include directory + find_path( + ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER} + NAMES ${module_name}/include + PATHS ${ROOT_DIR} ${ROOT_DIR}/.. + NO_DEFAULT_PATH + ) + # set include directory for module if found + if(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) + set(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER} ${ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}}/${module_name}/include) + endif() + + else() + + foreach(library_name IN LISTS LIBRARY_NAMES) + find_library( + ROCM_LIBRARIES_${library_name} + NAMES ${library_name} + PATHS /opt/rocm + PATH_SUFFIXES "lib" "lib64" "${module_name}/lib" "rocm/${module_name}/lib" + ) + find_package_handle_standard_args(ROCM_${MODULE_NAME_UPPER} FAIL_MESSAGE + "For ROCM module ${module_name}, library ${library_name} could not be found. Please specify ROCM_ROOT or ${MODULE_NAME_UPPER}_ROOT." + REQUIRED_VARS ROCM_LIBRARIES_${library_name}) + if(ROCM_LIBRARIES_${library_name}) + list(APPEND ROCM_LIBRARIES_${MODULE_NAME_UPPER} ${ROCM_LIBRARIES_${library_name}}) + mark_as_advanced(ROCM_LIBRARIES_${library_name}) + endif() + endforeach() + + # find include directory + find_path( + ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER} + NAMES ${module_name}/include + PATHS /opt/rocm/ + ) + # set include directory for module if found + if(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) + set(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER} ${ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}}/${module_name}/include) + endif() + endif() + + + # check if all required parts found + find_package_handle_standard_args(ROCM_${MODULE_NAME_UPPER} FAIL_MESSAGE + "ROCM module ${module_name} could not be found. Please specify ROCM_ROOT or ${MODULE_NAME_UPPER}_ROOT." + REQUIRED_VARS ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) + if(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) + mark_as_advanced(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) + endif() + + # set global variables + if(ROCM_LIBRARIES_${MODULE_NAME_UPPER}) + set(ROCM_LIBRARIES ${ROCM_LIBRARIES} ${ROCM_LIBRARIES_${MODULE_NAME_UPPER}} PARENT_SCOPE) + endif() + if(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) + set(ROCM_INCLUDE_DIRS ${ROCM_INCLUDE_DIRS} ${ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}} PARENT_SCOPE) + endif() + +endfunction() + + +# Finds executables of rocm modules +# IN: +# - module_name: name of a module (e.g. hcc) +# - executable_name: name of the executable (e.g. hcc) +# OUT: +# - ROCM_${executable_name}_EXECUTABLE: Path to executable +function(find_rocm_executable module_name executable_name) + string(TOUPPER ${module_name} MODULE_NAME_UPPER) + string(TOUPPER ${executable_name} EXECUTABLE_NAME_UPPER) + unset(ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE PARENT_SCOPE) + + if(DEFINED ${MODULE_NAME_UPPER}_ROOT) + set(ROOT_DIR ${${MODULE_NAME_UPPER}_ROOT}) + elseif(DEFINED ROCM_ROOT) + set(ROOT_DIR ${ROCM_ROOT}) + endif() + + # get abosolute path to avoid issues with tilde + if(ROOT_DIR) + get_filename_component(ROOT_DIR ${ROOT_DIR} ABSOLUTE) + endif() + + if(ROOT_DIR) + find_file( + ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE + NAMES ${executable_name} + PATHS ${ROOT_DIR} + PATH_SUFFIXES "bin" "${module_name}/bin" + NO_DEFAULT_PATH + ) + else() + find_file( + ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE + NAMES ${executable_name} + PATHS "/opt/rocm" + PATH_SUFFIXES "bin" "${module_name}/bin" + ) + endif() + set(ROCM_${EXECUTABLE_NAME_UPPER} ROCM_${EXECUTABLE_NAME_UPPER} PARENT_SCOPE) + + if(${ROCM_FIND_REQUIRED}) + set(ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER}_FIND_REQUIRED TRUE) + else() + set(ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER}_FIND_REQUIRED FALSE) + endif() + if(${ROCM_FIND_QUIETLY}) + set(ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER}_FIND_QUIETLY TRUE) + else() + set(ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER}_FIND_QUIETLY FALSE) + endif() + find_package_handle_standard_args(ROCM FAIL_MESSAGE + "ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER} ${executable_name} executable could not be found. Please specify ROCM_ROOT or ${MODULE_NAME_UPPER}_ROOT." + REQUIRED_VARS ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE) + if(ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE) + set(ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE ${ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE} PARENT_SCOPE) + mark_as_advanced(ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE) + endif() +endfunction() + + + +# find compilers +find_rocm_executable(hcc hcc) +find_rocm_executable(hip hipcc) + +if(ROCM_HIPCC_EXECUTABLE AND ROCM_HCC_EXECUTABLE) + set(ROCM_FOUND TRUE) +else() + set(ROCM_FOUND FALSE) + return() +endif() + + +# find other executables and libraries +find_rocm_executable(hcc hcc-config) +find_rocm_executable(hip hipconfig) +find_rocm_executable(hip hipify-perl) +find_rcm_module(hcc LTO OptRemarks mcwamp mcwamp_cpu mcwamp_hsa hc_am) +find_rcm_module(hip hip_hcc) +find_rcm_module(rocm hsa-runtime64) + + +# parse hip config +execute_process(COMMAND ${ROCM_HIPCONFIG_EXECUTABLE} -P OUTPUT_VARIABLE ROCM_HIP_PLATFORM RESULT_VARIABLE RESULT_VALUE) +if(NOT ${RESULT_VALUE} EQUAL 0) + message(FATAL_ERROR "Error parsing platform identifier from hipconfig! Code: ${RESULT_VALUE}") +endif() +if(NOT ROCM_HIP_PLATFORM) + message(FATAL_ERROR "Empty platform identifier from hipconfig!") +endif() + +# set definitions +if("${ROCM_HIP_PLATFORM}" STREQUAL "hcc") + set(ROCM_DEFINITIONS -D__HIP_PLATFORM_HCC__) +elseif("${ROCM_HIP_PLATFORM}" STREQUAL "nvcc") + set(ROCM_DEFINITIONS -D__HIP_PLATFORM_NVCC__) +else() + message(FATAL_ERROR "Could not parse platform identifier from hipconfig! Value: ${ROCM_HIP_PLATFORM}") +endif() + +# find libraries for each specified components +foreach(module_name IN LISTS ROCM_FIND_COMPONENTS) + # set required libaries for each module + if("${module_name}" STREQUAL "hipblas") + find_rcm_module(hipblas hipblas) + elseif("${module_name}" STREQUAL "hipsparse") + find_rcm_module(hipsparse hipsparse) + elseif("${module_name}" STREQUAL "rocblas") + find_rcm_module(rocblas rocblas) + elseif("${module_name}" STREQUAL "rocsparse") + find_rcm_module(rocsparse rocsparse) + elseif("${module_name}" STREQUAL "rocfft") + find_rcm_module(rocfft rocfft rocfft-device) + else() + message(FATAL_ERROR "Unrecognized component \"${module_name}\" in FindROCM module!") + endif() +endforeach() + + +# Generates library compiled with hipcc +# Usage: +# rocm_hip_add_library( [STATIC | SHARED] [FLAGS] [OUTPUT_DIR] [INCLUDE_DIRS] ) +macro(rocm_hip_add_library) + cmake_parse_arguments( + HIP_LIB + "SHARED;STATIC" + "OUTPUT_DIR" + "FLAGS;INCLUDE_DIRS" + ${ARGN} + ) + # allow either STATIC or SHARED + if(HIP_LIB_SHARED AND HIP_LIB_STATIC) + message(FATAL_ERROR "rocm_hip_add_library: library cannot by both static and shared!") + endif() + + # default to SHARED + if(NOT (HIP_LIB_SHARED OR HIP_LIB_STATIC)) + set(HIP_LIB_SHARED TRUE) + endif() + + # default to current binary output directory + if(NOT HIP_LIB_OUTPUT_DIR) + set(HIP_LIB_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) + endif() + + # parse positional arguments + list(LENGTH HIP_LIB_UNPARSED_ARGUMENTS NARGS) + if(${NARGS} LESS 2) + message(FATAL_ERROR "rocm_hip_add_library: Not enough arguments!") + endif() + list(GET HIP_LIB_UNPARSED_ARGUMENTS 0 HIP_LIB_NAME) + list(REMOVE_AT HIP_LIB_UNPARSED_ARGUMENTS 0) + set(HIP_LIB_SOURCES ${HIP_LIB_UNPARSED_ARGUMENTS}) + + # generate include flags + set(_ROCM_FULL_PATH_INCLUDE_FLAGS) + foreach(_rocm_iternal_dir IN LISTS HIP_LIB_INCLUDE_DIRS) + if(NOT IS_ABSOLUTE ${_rocm_iternal_dir}) + get_filename_component(_rocm_iternal_dir ${_rocm_iternal_dir} ABSOLUTE) + endif() + list(APPEND _ROCM_FULL_PATH_INCLUDE_FLAGS -I${_rocm_iternal_dir}) + endforeach() + + # generate full path to source files + unset(_ROCM_SOURCES) + foreach(source IN LISTS HIP_LIB_SOURCES) + if(NOT IS_ABSOLUTE ${source}) + get_filename_component(source ${source} ABSOLUTE) + endif() + set(_ROCM_SOURCES ${_ROCM_SOURCES} ${source}) + endforeach() + + # generate flags to use + set(_ROCM_STD_FLAGS ${HIP_LIB_FLAGS} ${ROCM_HIPCC_FLAGS}) + list(FILTER _ROCM_STD_FLAGS INCLUDE REGEX ^-std=) + set(_ROCM_FLAGS) + if(CMAKE_CXX_STANDARD AND NOT _ROCM_STD_FLAGS) + list(APPEND _ROCM_FLAGS -std=c++${CMAKE_CXX_STANDARD}) + endif() + if(CMAKE_BUILD_TYPE) + string(TOUPPER ${CMAKE_BUILD_TYPE} _ROCM_BUILD_TYPE_UPPER) + list(APPEND _ROCM_FLAGS ${ROCM_HIPCC_FLAGS_${_ROCM_BUILD_TYPE_UPPER}}) + endif() + + if(NOT ROCM_HIPCC_EXECUTABLE) + message(FATAL_ERROR "HIPCC executable not found!") + endif() + + # create imported shared library + if(HIP_LIB_SHARED) + set(_ROCM_FLAGS ${_ROCM_FLAGS} -fPIC) + add_library(${HIP_LIB_NAME} SHARED IMPORTED GLOBAL) + set_target_properties(${HIP_LIB_NAME} PROPERTIES IMPORTED_LOCATION ${HIP_LIB_OUTPUT_DIR}/lib${HIP_LIB_NAME}.so) + endif() + + # compile all files to .o + set(_ROCM_OBJS) + set(_ROCM_OBJ_TARGETS) + foreach(_rocm_file IN LISTS _ROCM_SOURCES) + get_filename_component(_ROCM_FILE_NAME_ONLY ${_rocm_file} NAME) + set(_ROCM_OBJ_FILE ${HIP_LIB_OUTPUT_DIR}/${_ROCM_FILE_NAME_ONLY}.o) + list(APPEND _ROCM_OBJS ${_ROCM_OBJ_FILE}) + list(APPEND _ROCM_OBJ_TARGETS HIP_TARGET_${_ROCM_FILE_NAME_ONLY}) + add_custom_target(HIP_TARGET_${_ROCM_FILE_NAME_ONLY} COMMAND ${ROCM_HIPCC_EXECUTABLE} -c ${_rocm_file} -o ${_ROCM_OBJ_FILE} ${_ROCM_FLAGS} ${_ROCM_FULL_PATH_INCLUDE_FLAGS} + WORKING_DIRECTORY ${HIP_LIB_OUTPUT_DIR} SOURCES ${_rocm_file}) + + endforeach() + + # compile shared library + if(HIP_LIB_SHARED) + add_custom_target(HIP_TARGET_${HIP_LIB_NAME} COMMAND ${ROCM_HIPCC_EXECUTABLE} ${_ROCM_OBJS} -fPIC --shared -o ${HIP_LIB_OUTPUT_DIR}/lib${HIP_LIB_NAME}.so ${_ROCM_FLAGS} ${_ROCM_FULL_PATH_INCLUDE_FLAGS} + WORKING_DIRECTORY ${HIP_LIB_OUTPUT_DIR}) + + # add depencies + add_dependencies(${HIP_LIB_NAME} HIP_TARGET_${HIP_LIB_NAME}) + foreach(_rocm_target IN LISTS _ROCM_OBJ_TARGETS) + add_dependencies(HIP_TARGET_${HIP_LIB_NAME} ${_rocm_target}) + endforeach() + endif() + + # static library + if(HIP_LIB_STATIC) + # create library from object files + add_library(${HIP_LIB_NAME} ${_ROCM_OBJS}) + set_target_properties(${HIP_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX) + set_source_files_properties( + ${_ROCM_OBJS} + PROPERTIES + EXTERNAL_OBJECT true + GENERATED true + ) + # add dependencies + foreach(_rocm_target IN LISTS _ROCM_OBJ_TARGETS) + add_dependencies(${HIP_LIB_NAME} ${_rocm_target}) + endforeach() + endif() + +endmacro() + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 64c838b11..da70a36a3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,5 +23,22 @@ if(CREATE_FORTRAN_BINDINGS) ) endif() +if(USE_ROCM) + add_library(sirius_cpp STATIC ./SDDK/GPU/rocfft_interface.cpp) + set_target_properties(sirius_cpp PROPERTIES POSITION_INDEPENDENT_CODE ON) + INSTALL (TARGETS sirius_cpp ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/) + + # use include directories and definitions of sirius_cpp (using custom targets not possible with generators) + set(DEFINITIONS_PROP "$") + set(DEFINITIONS_GENERATOR "$<$:-D$>") + set(INCLUDE_DIR_PROP "$") + set(INCLUDE_DIR_GENERATOR "$<$:-I$>") + + # create gpu library compiled with hip + file(GLOB_RECURSE HIPFILES_KERNELS "Kernels/*.cu") + rocm_hip_add_library(sirius_rocm SHARED ./SDDK/GPU/fft_kernels.cu ${HIPFILES_KERNELS} + FLAGS ${DEFINITIONS_GENERATOR} ${INCLUDE_DIR_GENERATOR} "-Wno-macro-redefined") +endif() + install(DIRECTORY ./ DESTINATION "${CMAKE_INSTALL_PREFIX}/include/sirius" FILES_MATCHING REGEX ".*(hpp|h)$") install(FILES ${PROJECT_BINARY_DIR}/src/version.hpp DESTINATION "${CMAKE_INSTALL_PREFIX}/include/sirius") From fecc50f22933283482a0f9f964e0e154a227cfca Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Thu, 21 Feb 2019 15:00:15 +0100 Subject: [PATCH 08/28] ported two more kernels to ROCM --- src/SDDK/GPU/checksum.cu | 26 +++++----- src/SDDK/GPU/fft_kernels.cu | 97 ++++++++++++++++++------------------ src/SDDK/GPU/scale_matrix.cu | 34 ++++++------- 3 files changed, 78 insertions(+), 79 deletions(-) diff --git a/src/SDDK/GPU/checksum.cu b/src/SDDK/GPU/checksum.cu index 2b65d8ddf..76b34cc67 100644 --- a/src/SDDK/GPU/checksum.cu +++ b/src/SDDK/GPU/checksum.cu @@ -23,18 +23,19 @@ */ #include "cuda_common.hpp" -#include +#include +#include __global__ void double_complex_checksum_gpu_kernel ( - cuDoubleComplex const* ptr__, + hipDoubleComplex const* ptr__, size_t size__, - cuDoubleComplex *result__ + hipDoubleComplex *result__ ) { int N = num_blocks(size__, blockDim.x); - extern __shared__ char sdata_ptr[]; + HIP_DYNAMIC_SHARED( char, sdata_ptr) double* sdata_x = (double*)&sdata_ptr[0]; double* sdata_y = (double*)&sdata_ptr[blockDim.x * sizeof(double)]; @@ -58,27 +59,26 @@ __global__ void double_complex_checksum_gpu_kernel __syncthreads(); } - *result__ = make_cuDoubleComplex(sdata_x[0], sdata_y[0]); + *result__ = make_hipDoubleComplex(sdata_x[0], sdata_y[0]); } -extern "C" void double_complex_checksum_gpu(cuDoubleComplex const* ptr__, +extern "C" void double_complex_checksum_gpu(hipDoubleComplex const* ptr__, size_t size__, - cuDoubleComplex* result__) + hipDoubleComplex* result__) { dim3 grid_t(64); dim3 grid_b(1); - cuDoubleComplex* res; - cudaMalloc(&res, sizeof(cuDoubleComplex)); + hipDoubleComplex* res; + hipMalloc(&res, sizeof(hipDoubleComplex)); - double_complex_checksum_gpu_kernel <<>> - ( + hipLaunchKernelGGL((double_complex_checksum_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), 0, ptr__, size__, res ); - cudaMemcpy(result__, res, sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost); + hipMemcpy(result__, res, sizeof(hipDoubleComplex), hipMemcpyDeviceToHost); - cudaFree(res); + hipFree(res); } diff --git a/src/SDDK/GPU/fft_kernels.cu b/src/SDDK/GPU/fft_kernels.cu index 7717912b2..3a32af584 100644 --- a/src/SDDK/GPU/fft_kernels.cu +++ b/src/SDDK/GPU/fft_kernels.cu @@ -24,7 +24,8 @@ #include "acc.hpp" #include -#include "hip/hip_runtime.h" +#include +#include #include "cuda_common.hpp" //NOTE: HIP will call the corresponding CUDA function if compiled with CUDA support @@ -35,8 +36,8 @@ __global__ void repack_z_buffer_gpu_kernel(int size_z, int num_zcol_loc, int const* local_z_offsets, int const* local_z_sizes, - double2* z_sticks_local, - double2* a2a_buffer) + hipDoubleComplex* z_sticks_local, + hipDoubleComplex* a2a_buffer) { int iz = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; int izcol = hipBlockIdx_y; @@ -61,8 +62,8 @@ extern "C" void repack_z_buffer_gpu(int direction, int zcol_max_size, int const* local_z_offsets, int const* local_z_sizes, - double2* z_sticks_local, - double2* a2a_buffer) + hipDoubleComplex* z_sticks_local, + hipDoubleComplex* a2a_buffer) { dim3 grid_t(64); dim3 grid_b(num_blocks(zcol_max_size, grid_t.x), num_zcol_loc, num_ranks); @@ -93,8 +94,8 @@ extern "C" void repack_z_buffer_gpu(int direction, __global__ void batch_load_gpu_kernel(int fft_size, int num_pw_components, int const* map, - double2 const* data, - double2* fft_buffer) + hipDoubleComplex const* data, + hipDoubleComplex* fft_buffer) { int i = hipBlockIdx_y; int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; @@ -108,8 +109,8 @@ extern "C" void batch_load_gpu(int fft_size, int num_pw_components, int num_fft, int const* map, - double2 const* data, - double2* fft_buffer, + hipDoubleComplex const* data, + hipDoubleComplex* fft_buffer, int stream_id__) { dim3 grid_t(64); @@ -117,7 +118,7 @@ extern "C" void batch_load_gpu(int fft_size, hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); - hipMemsetAsync(fft_buffer, 0, fft_size * num_fft * sizeof(double2), stream); + hipMemsetAsync(fft_buffer, 0, fft_size * num_fft * sizeof(hipDoubleComplex), stream); hipLaunchKernelGGL((batch_load_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, fft_size, @@ -131,8 +132,8 @@ extern "C" void batch_load_gpu(int fft_size, __global__ void batch_unload_gpu_kernel(int fft_size, int num_pw_components, int const* map, - double2 const* fft_buffer, - double2* data, + hipDoubleComplex const* fft_buffer, + hipDoubleComplex* data, double alpha, double beta) { @@ -140,9 +141,9 @@ __global__ void batch_unload_gpu_kernel(int fft_size, int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; if (idx < num_pw_components) { - double2 z1 = data[array2D_offset(idx, i, num_pw_components)]; - double2 z2 = fft_buffer[array2D_offset(map[idx], i, fft_size)]; - data[array2D_offset(idx, i, num_pw_components)] = double2{alpha * z1.x + beta * z2.x, alpha * z1.y + beta * z2.y}; + hipDoubleComplex z1 = data[array2D_offset(idx, i, num_pw_components)]; + hipDoubleComplex z2 = fft_buffer[array2D_offset(map[idx], i, fft_size)]; + data[array2D_offset(idx, i, num_pw_components)] = make_hipDoubleComplex(alpha * z1.x + beta * z2.x, alpha * z1.y + beta * z2.y); //data[array2D_offset(idx, i, num_pw_components)] = cuCadd( // cuCmul(make_cuDoubleComplex(alpha, 0), data[array2D_offset(idx, i, num_pw_components)]), @@ -157,8 +158,8 @@ extern "C" void batch_unload_gpu(int fft_size, int num_pw_components, int num_fft, int const* map, - double2 const* fft_buffer, - double2* data, + hipDoubleComplex const* fft_buffer, + hipDoubleComplex* data, double alpha, double beta, int stream_id__) @@ -169,7 +170,7 @@ extern "C" void batch_unload_gpu(int fft_size, hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); if (alpha == 0) { - hipMemsetAsync(data, 0, num_pw_components * sizeof(double2), stream); + hipMemsetAsync(data, 0, num_pw_components * sizeof(hipDoubleComplex), stream); } hipLaunchKernelGGL((batch_unload_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, @@ -185,21 +186,21 @@ extern "C" void batch_unload_gpu(int fft_size, __global__ void load_x0y0_col_gpu_kernel(int z_col_size, int const* map, - double2 const* data, - double2* fft_buffer) + hipDoubleComplex const* data, + hipDoubleComplex* fft_buffer) { int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; if (idx < z_col_size) { - fft_buffer[map[idx]] = double2{data[idx].x, -data[idx].y}; + fft_buffer[map[idx]] = make_hipDoubleComplex(data[idx].x, -data[idx].y); } } extern "C" void load_x0y0_col_gpu(int z_col_size, int const* map, - double2 const* data, - double2* fft_buffer, + hipDoubleComplex const* data, + hipDoubleComplex* fft_buffer, int stream_id__) { dim3 grid_t(64); @@ -216,8 +217,8 @@ extern "C" void load_x0y0_col_gpu(int z_col_size, } template -__global__ void pack_unpack_z_cols_gpu_kernel(double2* z_cols_packed__, - double2* fft_buf__, +__global__ void pack_unpack_z_cols_gpu_kernel(hipDoubleComplex* z_cols_packed__, + hipDoubleComplex* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -245,8 +246,8 @@ __global__ void pack_unpack_z_cols_gpu_kernel(double2* z_cols_packed__, } } -extern "C" void unpack_z_cols_gpu(double2* z_cols_packed__, - double2* fft_buf__, +extern "C" void unpack_z_cols_gpu(hipDoubleComplex* z_cols_packed__, + hipDoubleComplex* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -260,7 +261,7 @@ extern "C" void unpack_z_cols_gpu(double2* z_cols_packed__, dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - hipMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(double2), stream); + hipMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(hipDoubleComplex), stream); hipLaunchKernelGGL((pack_unpack_z_cols_gpu_kernel<1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed__, @@ -284,8 +285,8 @@ extern "C" void unpack_z_cols_gpu(double2* z_cols_packed__, } } -extern "C" void pack_z_cols_gpu(double2* z_cols_packed__, - double2* fft_buf__, +extern "C" void pack_z_cols_gpu(hipDoubleComplex* z_cols_packed__, + hipDoubleComplex* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -310,9 +311,9 @@ extern "C" void pack_z_cols_gpu(double2* z_cols_packed__, } template -__global__ void pack_unpack_two_z_cols_gpu_kernel(double2* z_cols_packed1__, - double2* z_cols_packed2__, - double2* fft_buf__, +__global__ void pack_unpack_two_z_cols_gpu_kernel(hipDoubleComplex* z_cols_packed1__, + hipDoubleComplex* z_cols_packed2__, + hipDoubleComplex* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -326,32 +327,32 @@ __global__ void pack_unpack_two_z_cols_gpu_kernel(double2* z_cols_packed1__, /* load into buffer */ if (direction == 1) { int ipos = z_col_pos__[icol]; - double2 z1 = z_cols_packed1__[array2D_offset(iz, icol, size_z__)]; - double2 z2 = z_cols_packed2__[array2D_offset(iz, icol, size_z__)]; + hipDoubleComplex z1 = z_cols_packed1__[array2D_offset(iz, icol, size_z__)]; + hipDoubleComplex z2 = z_cols_packed2__[array2D_offset(iz, icol, size_z__)]; if (conjugate) { /* conj(z1) + I * conj(z2) */ - fft_buf__[array2D_offset(ipos, iz, size_xy)] = double2{z1.x + z2.y, z2.x - z1.y}; + fft_buf__[array2D_offset(ipos, iz, size_xy)] = make_hipDoubleComplex(z1.x + z2.y, z2.x - z1.y); } else { /* z1 + I * z2 */ - fft_buf__[array2D_offset(ipos, iz, size_xy)] = double2{z1.x - z2.y, z1.y + z2.x}; + fft_buf__[array2D_offset(ipos, iz, size_xy)] = make_hipDoubleComplex(z1.x - z2.y, z1.y + z2.x); } } if (direction == -1) { int ipos1 = z_col_pos__[icol]; int ipos2 = z_col_pos__[num_z_cols__ + icol]; - double2 z1 = fft_buf__[array2D_offset(ipos1, iz, size_xy)]; - double2 z2 = fft_buf__[array2D_offset(ipos2, iz, size_xy)]; + hipDoubleComplex z1 = fft_buf__[array2D_offset(ipos1, iz, size_xy)]; + hipDoubleComplex z2 = fft_buf__[array2D_offset(ipos2, iz, size_xy)]; - z_cols_packed1__[array2D_offset(iz, icol, size_z__)] = double2{0.5 * (z1.x + z2.x), 0.5 * (z1.y - z2.y)}; - z_cols_packed2__[array2D_offset(iz, icol, size_z__)] = double2{0.5 * (z1.y + z2.y), 0.5 * (z2.x - z1.x)}; + z_cols_packed1__[array2D_offset(iz, icol, size_z__)] = make_hipDoubleComplex(0.5 * (z1.x + z2.x), 0.5 * (z1.y - z2.y)); + z_cols_packed2__[array2D_offset(iz, icol, size_z__)] = make_hipDoubleComplex(0.5 * (z1.y + z2.y), 0.5 * (z2.x - z1.x)); } } } -extern "C" void unpack_z_cols_2_gpu(double2* z_cols_packed1__, - double2* z_cols_packed2__, - double2* fft_buf__, +extern "C" void unpack_z_cols_2_gpu(hipDoubleComplex* z_cols_packed1__, + hipDoubleComplex* z_cols_packed2__, + hipDoubleComplex* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -364,7 +365,7 @@ extern "C" void unpack_z_cols_2_gpu(double2* z_cols_packed1__, dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - hipMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(double2), stream); + hipMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(hipDoubleComplex), stream); hipLaunchKernelGGL((pack_unpack_two_z_cols_gpu_kernel<1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed1__, @@ -388,9 +389,9 @@ extern "C" void unpack_z_cols_2_gpu(double2* z_cols_packed1__, ); } -extern "C" void pack_z_cols_2_gpu(double2* z_cols_packed1__, - double2* z_cols_packed2__, - double2* fft_buf__, +extern "C" void pack_z_cols_2_gpu(hipDoubleComplex* z_cols_packed1__, + hipDoubleComplex* z_cols_packed2__, + hipDoubleComplex* fft_buf__, int size_x__, int size_y__, int size_z__, diff --git a/src/SDDK/GPU/scale_matrix.cu b/src/SDDK/GPU/scale_matrix.cu index 31ac26e7f..043c747f3 100644 --- a/src/SDDK/GPU/scale_matrix.cu +++ b/src/SDDK/GPU/scale_matrix.cu @@ -22,12 +22,13 @@ * \brief Contains implementaiton of CUDA kernels to scale matrix elements (rows or columns). */ #include "cuda_common.hpp" -#include +#include +#include __global__ void scale_matrix_columns_gpu_kernel ( int nrow, - cuDoubleComplex* mtrx, + hipDoubleComplex* mtrx, double* a ) { @@ -36,21 +37,20 @@ __global__ void scale_matrix_columns_gpu_kernel if (irow < nrow) { mtrx[array2D_offset(irow, icol, nrow)] = - cuCmul(mtrx[array2D_offset(irow, icol, nrow)], make_cuDoubleComplex(a[icol], 0)); + hipCmul(mtrx[array2D_offset(irow, icol, nrow)], make_hipDoubleComplex(a[icol], 0)); } } // scale each column of the matrix by a column-dependent constant extern "C" void scale_matrix_columns_gpu(int nrow, int ncol, - cuDoubleComplex* mtrx, + hipDoubleComplex* mtrx, double* a) { dim3 grid_t(64); dim3 grid_b(num_blocks(nrow, grid_t.x), ncol); - scale_matrix_columns_gpu_kernel <<>> - ( + hipLaunchKernelGGL((scale_matrix_columns_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, nrow, mtrx, a @@ -60,29 +60,28 @@ extern "C" void scale_matrix_columns_gpu(int nrow, __global__ void scale_matrix_rows_gpu_kernel ( int nrow__, - cuDoubleComplex* mtrx__, + hipDoubleComplex* mtrx__, double const* v__ ) { int icol = blockIdx.y; int irow = blockDim.x * blockIdx.x + threadIdx.x; if (irow < nrow__) { - cuDoubleComplex z = mtrx__[array2D_offset(irow, icol, nrow__)]; - mtrx__[array2D_offset(irow, icol, nrow__)] = make_cuDoubleComplex(z.x * v__[irow], z.y * v__[irow]); + hipDoubleComplex z = mtrx__[array2D_offset(irow, icol, nrow__)]; + mtrx__[array2D_offset(irow, icol, nrow__)] = make_hipDoubleComplex(z.x * v__[irow], z.y * v__[irow]); } } // scale each row of the matrix by a row-dependent constant extern "C" void scale_matrix_rows_gpu(int nrow__, int ncol__, - cuDoubleComplex* mtrx__, + hipDoubleComplex* mtrx__, double const* v__) { dim3 grid_t(256); dim3 grid_b(num_blocks(nrow__, grid_t.x), ncol__); - scale_matrix_rows_gpu_kernel <<>> - ( + hipLaunchKernelGGL((scale_matrix_rows_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, nrow__, mtrx__, v__ @@ -91,7 +90,7 @@ extern "C" void scale_matrix_rows_gpu(int nrow__, __global__ void scale_matrix_elements_gpu_kernel ( - cuDoubleComplex* mtrx__, + hipDoubleComplex* mtrx__, int ld__, int nrow__, double beta__ @@ -100,12 +99,12 @@ __global__ void scale_matrix_elements_gpu_kernel int icol = blockIdx.y; int irow = blockDim.x * blockIdx.x + threadIdx.x; if (irow < nrow__) { - cuDoubleComplex z = mtrx__[array2D_offset(irow, icol, ld__)]; - mtrx__[array2D_offset(irow, icol, ld__)] = make_cuDoubleComplex(z.x * beta__, z.y * beta__); + hipDoubleComplex z = mtrx__[array2D_offset(irow, icol, ld__)]; + mtrx__[array2D_offset(irow, icol, ld__)] = make_hipDoubleComplex(z.x * beta__, z.y * beta__); } } -extern "C" void scale_matrix_elements_gpu(cuDoubleComplex* ptr__, +extern "C" void scale_matrix_elements_gpu(hipDoubleComplex* ptr__, int ld__, int nrow__, int ncol__, @@ -114,8 +113,7 @@ extern "C" void scale_matrix_elements_gpu(cuDoubleComplex* ptr__, dim3 grid_t(64); dim3 grid_b(num_blocks(nrow__, grid_t.x), ncol__); - scale_matrix_elements_gpu_kernel <<>> - ( + hipLaunchKernelGGL((scale_matrix_elements_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, ptr__, ld__, nrow__, From 2039fac726312b42ee7ed9998d4a85d2ccd04689 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Thu, 21 Feb 2019 15:06:06 +0100 Subject: [PATCH 09/28] fixed flags in CMake FindROCM module --- cmake/modules/FindROCM.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/FindROCM.cmake b/cmake/modules/FindROCM.cmake index df7efcd6e..70f3ea06d 100644 --- a/cmake/modules/FindROCM.cmake +++ b/cmake/modules/FindROCM.cmake @@ -361,7 +361,7 @@ macro(rocm_hip_add_library) # generate flags to use set(_ROCM_STD_FLAGS ${HIP_LIB_FLAGS} ${ROCM_HIPCC_FLAGS}) list(FILTER _ROCM_STD_FLAGS INCLUDE REGEX ^-std=) - set(_ROCM_FLAGS) + set(_ROCM_FLAGS ${HIP_LIB_FLAGS}) if(CMAKE_CXX_STANDARD AND NOT _ROCM_STD_FLAGS) list(APPEND _ROCM_FLAGS -std=c++${CMAKE_CXX_STANDARD}) endif() From cbd499eab99724f6d82fccabfa1b3e2134501092 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Thu, 21 Feb 2019 15:22:18 +0100 Subject: [PATCH 10/28] Added hipblas integration (ROCM) --- CMakeLists.txt | 15 +- src/Band/diag_full_potential.hpp | 2 +- src/Band/diag_pseudo_potential.hpp | 8 +- src/Beta_projectors/beta_projectors_base.hpp | 4 +- src/CMakeLists.txt | 3 +- src/Hamiltonian/non_local_operator.hpp | 6 +- .../hubbard_generate_atomic_orbitals.hpp | 2 +- .../hubbard_occupancies_derivatives.hpp | 6 +- src/Hubbard/hubbard_occupancy.hpp | 2 +- src/Kernels/cuda_uspp_kernels.cu | 5 - src/Kernels/generate_dm_pw.cu | 10 +- src/SDDK/GPU/acc.hpp | 19 +- src/SDDK/GPU/gpublas_interface.hpp | 40 +++ src/SDDK/GPU/hipblas_interface.hpp | 305 ++++++++++++++++++ src/SDDK/linalg.hpp | 106 ++---- src/SDDK/linalg_base.hpp | 5 +- src/SDDK/wf_inner.hpp | 2 +- src/simulation_context.hpp | 6 +- src/sirius.h | 9 +- 19 files changed, 415 insertions(+), 140 deletions(-) create mode 100644 src/SDDK/GPU/gpublas_interface.hpp create mode 100644 src/SDDK/GPU/hipblas_interface.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 7cd10dd4e..cebc6449d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,9 +132,8 @@ if(USE_CUDA AND USE_ROCM) endif() if(USE_ROCM) - # TODO message(STATUS "WARNING: ROCM enabled, prototype feature! Only limited functionality available.") - find_package(ROCM COMPONENTS rocfft) + find_package(ROCM COMPONENTS rocfft hipblas) if(NOT ${ROCM_HIP_PLATFORM} STREQUAL hcc) message(FATAL_ERROR "Compilation on Nvidia platform not supported with ROCM enabled!") endif() @@ -142,18 +141,6 @@ if(USE_ROCM) add_definitions("-D__ROCM") add_definitions(${ROCM_DEFINITIONS}) include_directories(${ROCM_INCLUDE_DIRS}) - - # set compile flags to use with HIPCC compiler - set(HIPCC_COMPILE_FLAGS) - set(BUILD_TYPES DEBUG RELEASE RELWITHDEBINFO) - if(CMAKE_BUILD_TYPE) - string(TOUPPER ${CMAKE_BUILD_TYPE} BUILD_TYPE_UPPER) - foreach(build_type IN LISTS BUILD_TYPES) - if("${build_type}" STREQUAL "${BUILD_TYPE_UPPER}") - list(APPEND HIPCC_COMPILE_FLAGS ${CMAKE_CXX_FLAGS_${BUILD_TYPE_UPPER}}) - endif() - endforeach() - endif() endif() # add required libraries diff --git a/src/Band/diag_full_potential.hpp b/src/Band/diag_full_potential.hpp index e22af6406..a92911948 100644 --- a/src/Band/diag_full_potential.hpp +++ b/src/Band/diag_full_potential.hpp @@ -699,7 +699,7 @@ inline void Band::diag_full_potential_second_variation(K_point& kp__, Hamiltonia linalg_t la{linalg_t::blas}; if (ctx_.processing_unit() == device_t::GPU) { mem = memory_t::device; - la = linalg_t::cublas; + la = linalg_t::gpublas; } if (ctx_.num_mag_dims() != 3) { diff --git a/src/Band/diag_pseudo_potential.hpp b/src/Band/diag_pseudo_potential.hpp index f9bd43dcb..f2bde7246 100644 --- a/src/Band/diag_pseudo_potential.hpp +++ b/src/Band/diag_pseudo_potential.hpp @@ -22,14 +22,16 @@ * \brief Diagonalization of pseudopotential Hamiltonian. */ +#include + #if defined(__GPU) && defined(__CUDA) extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, int n, double c, double r, - cuDoubleComplex* phi0, - cuDoubleComplex* phi1, - cuDoubleComplex* phi2); + hipDoubleComplex* phi0, + hipDoubleComplex* phi1, + hipDoubleComplex* phi2); #endif template diff --git a/src/Beta_projectors/beta_projectors_base.hpp b/src/Beta_projectors/beta_projectors_base.hpp index 0d0ecf895..4114c49ea 100644 --- a/src/Beta_projectors/beta_projectors_base.hpp +++ b/src/Beta_projectors/beta_projectors_base.hpp @@ -425,7 +425,7 @@ inline void Beta_projectors_base::local_inner_aux(double_complex auto pp = utils::get_env("SIRIUS_PRINT_PERFORMANCE"); if (pp && gkvec_.comm().rank() == 0) { #ifdef __GPU - if (ctx_.blas_linalg_t() == linalg_t::cublas) { + if (ctx_.blas_linalg_t() == linalg_t::gpublas) { acc::sync_stream(stream_id(-1)); } #endif @@ -455,7 +455,7 @@ inline void Beta_projectors_base::local_inner_aux(double* beta_pw_coeffs linalg_t la{linalg_t::none}; /* both wave-functions and beta-projectors are on GPU */ if (is_device_memory(ctx_.preferred_memory_t())) { - la = linalg_t::cublas; + la = linalg_t::gpublas; } else { /* wave-functions are on CPU but the beta-projectors are in the memory of main device */ la = linalg_t::blas; switch (ctx_.processing_unit()) { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index da70a36a3..5b79c06d4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -36,7 +36,8 @@ if(USE_ROCM) # create gpu library compiled with hip file(GLOB_RECURSE HIPFILES_KERNELS "Kernels/*.cu") - rocm_hip_add_library(sirius_rocm SHARED ./SDDK/GPU/fft_kernels.cu ${HIPFILES_KERNELS} + file(GLOB_RECURSE HIPFILES_SDDK "SDDK/*.cu") + rocm_hip_add_library(sirius_rocm SHARED ${HIPFILES_SDDK} ${HIPFILES_KERNELS} FLAGS ${DEFINITIONS_GENERATOR} ${INCLUDE_DIR_GENERATOR} "-Wno-macro-redefined") endif() diff --git a/src/Hamiltonian/non_local_operator.hpp b/src/Hamiltonian/non_local_operator.hpp index 2ef17110c..f228deac8 100644 --- a/src/Hamiltonian/non_local_operator.hpp +++ b/src/Hamiltonian/non_local_operator.hpp @@ -138,7 +138,7 @@ inline void Non_local_operator::apply(int chunk__, int ispn_bloc } case device_t::GPU: { mem = memory_t::device; - la = linalg_t::cublas; + la = linalg_t::gpublas; break; } } @@ -223,7 +223,7 @@ inline void Non_local_operator::apply(int chunk__, int ia__, int } case device_t::GPU: { mem = memory_t::device; - la = linalg_t::cublas; + la = linalg_t::gpublas; break; } } @@ -289,7 +289,7 @@ inline void Non_local_operator::apply(int chunk__, int ispn_block__, Wav } case device_t::GPU: { mem = memory_t::device; - la = linalg_t::cublas; + la = linalg_t::gpublas; break; } } diff --git a/src/Hubbard/hubbard_generate_atomic_orbitals.hpp b/src/Hubbard/hubbard_generate_atomic_orbitals.hpp index 3a7301e88..809ccdc05 100644 --- a/src/Hubbard/hubbard_generate_atomic_orbitals.hpp +++ b/src/Hubbard/hubbard_generate_atomic_orbitals.hpp @@ -132,7 +132,7 @@ void Hubbard::orthogonalize_atomic_orbitals(K_point& kp, Wave_functions& sphi) linalg_t la{linalg_t::blas}; if (ctx_.processing_unit() == device_t::GPU) { mem = memory_t::device; - la = linalg_t::cublas; + la = linalg_t::gpublas; } if (ctx_.num_mag_dims() == 3) { diff --git a/src/Hubbard/hubbard_occupancies_derivatives.hpp b/src/Hubbard/hubbard_occupancies_derivatives.hpp index 2515b7e15..3cc705b0a 100644 --- a/src/Hubbard/hubbard_occupancies_derivatives.hpp +++ b/src/Hubbard/hubbard_occupancies_derivatives.hpp @@ -112,7 +112,7 @@ void Hubbard::compute_occupancies_derivatives(K_point& kp, linalg_t la{linalg_t::blas}; if (ctx_.processing_unit() == device_t::GPU) { mem = memory_t::device; - la = linalg_t::cublas; + la = linalg_t::gpublas; } /* compute */ @@ -284,7 +284,7 @@ void Hubbard::compute_occupancies_stress_derivatives(K_point& linalg_t la{linalg_t::blas}; if (ctx_.processing_unit() == device_t::GPU) { mem = memory_t::device; - la = linalg_t::cublas; + la = linalg_t::gpublas; } /* compute */ @@ -476,7 +476,7 @@ void Hubbard::compute_occupancies(K_point& kp, linalg_t la{linalg_t::blas}; if (ctx_.processing_unit() == device_t::GPU) { mem = memory_t::device; - la = linalg_t::cublas; + la = linalg_t::gpublas; } for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) { diff --git a/src/Hubbard/hubbard_occupancy.hpp b/src/Hubbard/hubbard_occupancy.hpp index 38ff03879..24723e8a1 100644 --- a/src/Hubbard/hubbard_occupancy.hpp +++ b/src/Hubbard/hubbard_occupancy.hpp @@ -79,7 +79,7 @@ void Hubbard::hubbard_compute_occupation_numbers(K_point_set& kset_) linalg_t la{linalg_t::blas}; if (ctx_.processing_unit() == device_t::GPU) { mem = memory_t::device; - la = linalg_t::cublas; + la = linalg_t::gpublas; } for (int ikloc = 0; ikloc < kset_.spl_num_kpoints().local_size(); ikloc++) { diff --git a/src/Kernels/cuda_uspp_kernels.cu b/src/Kernels/cuda_uspp_kernels.cu index 3e38d2758..dff012510 100644 --- a/src/Kernels/cuda_uspp_kernels.cu +++ b/src/Kernels/cuda_uspp_kernels.cu @@ -27,11 +27,6 @@ #include "hip/hip_complex.h" extern hipStream_t* streams; -extern "C" void* cuda_malloc(size_t size); -extern "C" void cuda_free(void* ptr); -extern "C" void cublas_zgemm(int transa, int transb, int32_t m, int32_t n, int32_t k, - hipDoubleComplex* alpha, hipDoubleComplex* a, int32_t lda, hipDoubleComplex* b, - int32_t ldb, hipDoubleComplex* beta, hipDoubleComplex* c, int32_t ldc, int stream_id); __global__ void compute_chebyshev_order1_gpu_kernel ( diff --git a/src/Kernels/generate_dm_pw.cu b/src/Kernels/generate_dm_pw.cu index a070b68d1..948569637 100644 --- a/src/Kernels/generate_dm_pw.cu +++ b/src/Kernels/generate_dm_pw.cu @@ -27,8 +27,8 @@ #include "hip/hip_runtime.h" #include "hip/hip_complex.h" -#ifdef __CUDA -#include "../SDDK/GPU/cublas.hpp" +#ifdef __GPU +#include "../SDDK/GPU/gpublas_interface.hpp" #endif __global__ void generate_phase_factors_conj_gpu_kernel @@ -85,17 +85,13 @@ extern "C" void generate_dm_pw_gpu(int num_atoms__, double alpha = 1; double beta = 0; -#ifdef __CUDA - cublas::dgemm('N', 'T', nbf__ * (nbf__ + 1) / 2, num_gvec_loc__ * 2, num_atoms__, + gpublas::dgemm('N', 'T', nbf__ * (nbf__ + 1) / 2, num_gvec_loc__ * 2, num_atoms__, &alpha, dm__, nbf__ * (nbf__ + 1) / 2, phase_factors__, num_gvec_loc__ * 2, &beta, dm_pw__, nbf__ * (nbf__ + 1) / 2, stream_id__); -#else - throw std::runtime_error("not implemented for non-CUDA."); -#endif acc::sync_stream(stream_id(stream_id__)); } diff --git a/src/SDDK/GPU/acc.hpp b/src/SDDK/GPU/acc.hpp index 028ea2559..891bca52b 100644 --- a/src/SDDK/GPU/acc.hpp +++ b/src/SDDK/GPU/acc.hpp @@ -471,7 +471,7 @@ inline bool check_device_ptr(void const* ptr__) } // namespace acc -#if defined(__CUDA) +#if defined(__GPU) extern "C" void scale_matrix_columns_gpu(int nrow, int ncol, void* mtrx, double* a); extern "C" void scale_matrix_rows_gpu(int nrow, int ncol, void* mtrx, double const* v); @@ -481,24 +481,7 @@ extern "C" void scale_matrix_elements_gpu(std::complex* ptr__, int nrow__, int ncol__, double beta__); -#elif defined(__ROCM) -inline void scale_matrix_columns_gpu(int nrow, int ncol, void* mtrx, double* a) { - throw std::runtime_error("Not implemented for ROCM!"); -} - -inline void scale_matrix_rows_gpu(int nrow, int ncol, void* mtrx, double const* v) { - throw std::runtime_error("Not implemented for ROCM!"); -} - -inline void scale_matrix_elements_gpu(std::complex* ptr__, - int ld__, - int nrow__, - int ncol__, - double beta__) { - throw std::runtime_error("Not implemented for ROCM!"); -} #endif - #endif // __ACC_HPP__ diff --git a/src/SDDK/GPU/gpublas_interface.hpp b/src/SDDK/GPU/gpublas_interface.hpp new file mode 100644 index 000000000..fb2dbaf3d --- /dev/null +++ b/src/SDDK/GPU/gpublas_interface.hpp @@ -0,0 +1,40 @@ +// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are permitted provided that +// the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the +// following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +// and the following disclaimer in the documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** \file cublas.hpp + * + * \brief Interface to GPU blas libraries + */ + +#ifndef __GPUBLAS_INTERFACE_HPP__ +#define __GPUBLAS_INTERFACE_HPP__ + +#include + +#if defined(__GPU) && defined(__CUDA) +#include "cublas.hpp" +namespace gpublas = cublas; + +#elif defined(__GPU) && defined(__ROCM) +#include "hipblas_interface.hpp" +namespace gpublas = hipblas; + +#endif + +#endif diff --git a/src/SDDK/GPU/hipblas_interface.hpp b/src/SDDK/GPU/hipblas_interface.hpp new file mode 100644 index 000000000..ed7ab6a21 --- /dev/null +++ b/src/SDDK/GPU/hipblas_interface.hpp @@ -0,0 +1,305 @@ +// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are permitted provided that +// the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the +// following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +// and the following disclaimer in the documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** \file hipblas.hpp + * + * \brief Interface to hipblas related functions. + */ + +#ifndef __HIP_BLAS_INTERFACE_HPP__ +#define __HIP_BLAS_INTERFACE_HPP__ + +#include +#include +#include +#include +#include "acc.hpp" + +namespace hipblas { + +inline void error_message(hipblasStatus_t status) +{ + switch (status) { + case HIPBLAS_STATUS_NOT_INITIALIZED: { + printf("the library was not initialized\n"); + break; + } + case HIPBLAS_STATUS_INVALID_VALUE: { + printf("the parameters m,n,k<0\n"); + break; + } + case HIPBLAS_STATUS_ARCH_MISMATCH: { + printf("the device does not support double-precision\n"); + break; + } + case HIPBLAS_STATUS_EXECUTION_FAILED: { + printf("the function failed to launch on the GPU\n"); + break; + } + default: { + printf("hipblas status unknown"); + } + } +} + +inline hipblasOperation_t get_hipblasOperation_t(char c) +{ + switch (c) { + case 'n': + case 'N': { + return HIPBLAS_OP_N; + } + case 't': + case 'T': { + return HIPBLAS_OP_T; + } + case 'c': + case 'C': { + return HIPBLAS_OP_C; + } + default: { + throw std::runtime_error("get_hipblasOperation_t(): wrong operation"); + } + } + return HIPBLAS_OP_N; // make compiler happy +} + +inline hipblasSideMode_t get_hipblasSideMode_t(char c) +{ + switch (c) { + case 'l': + case 'L': { + return HIPBLAS_SIDE_LEFT; + } + case 'r': + case 'R': { + return HIPBLAS_SIDE_RIGHT; + } + default: { + throw std::runtime_error("get_hipblasSideMode_t(): wrong side"); + } + } + return HIPBLAS_SIDE_LEFT; //make compiler happy +} + +inline hipblasFillMode_t get_hipblasFillMode_t(char c) +{ + switch (c) { + case 'u': + case 'U': { + return HIPBLAS_FILL_MODE_UPPER; + } + case 'l': + case 'L': { + return HIPBLAS_FILL_MODE_LOWER; + } + default: { + throw std::runtime_error("get_hipblasFillMode_t(): wrong mode"); + } + } + return HIPBLAS_FILL_MODE_UPPER; // make compiler happy +} + +inline hipblasDiagType_t get_hipblasDiagType_t(char c) +{ + switch (c) { + case 'n': + case 'N': { + return HIPBLAS_DIAG_NON_UNIT; + } + case 'u': + case 'U': { + return HIPBLAS_DIAG_UNIT; + } + default: { + throw std::runtime_error("get_hipblasDiagType_t(): wrong diagonal type"); + } + } + return HIPBLAS_DIAG_NON_UNIT; // make compiler happy +} + +#ifdef NDEBUG +#define CALL_HIPBLAS(func__, args__) \ +{ \ + hipblasStatus_t status; \ + if ((status = func__ args__) != HIPBLAS_STATUS_SUCCESS) { \ + error_message(status); \ + char nm[1024]; \ + gethostname(nm, 1024); \ + printf("hostname: %s\n", nm); \ + printf("Error in %s at line %i of file %s\n", #func__, __LINE__, __FILE__); \ + stack_backtrace(); \ + } \ +} +#else +#define CALL_HIPBLAS(func__, args__) \ +{ \ + hipblasStatus_t status; \ + func__ args__; \ + hipDeviceSynchronize(); \ + status = hipblasGetError(); \ + if (status != HIPBLAS_STATUS_SUCCESS) { \ + error_message(status); \ + char nm[1024]; \ + gethostname(nm, 1024); \ + printf("hostname: %s\n", nm); \ + printf("Error in %s at line %i of file %s\n", #func__, __LINE__, __FILE__); \ + stack_backtrace(); \ + } \ +} +#endif + +/// Store the default (null) stream handler. +inline hipblasHandle_t& null_stream_handle() +{ + static hipblasHandle_t null_stream_handle_; + return null_stream_handle_; +} + +/// Store the hipblas handlers associated with hip streams. +inline std::vector& stream_handles() +{ + static std::vector stream_handles_; + return stream_handles_; +} + +inline void create_stream_handles() +{ + CALL_HIPBLAS(hipblasCreate, (&null_stream_handle())); + + stream_handles() = std::vector(acc::num_streams()); + for (int i = 0; i < acc::num_streams(); i++) { + CALL_HIPBLAS(hipblasCreate, (&stream_handles()[i])); + + CALL_HIPBLAS(hipblasSetStream, (stream_handles()[i], acc::stream(stream_id(i)))); + } +} + +inline void destroy_stream_handles() +{ + CALL_HIPBLAS(hipblasDestroy, (null_stream_handle())); + for (int i = 0; i < acc::num_streams(); i++) { + CALL_HIPBLAS(hipblasDestroy, (stream_handles()[i])); + } +} + +inline hipblasHandle_t stream_handle(int id__) +{ + return (id__ == -1) ? null_stream_handle() : stream_handles()[id__]; +} + +inline void zgemv(char transa, int32_t m, int32_t n, hipDoubleComplex* alpha, hipDoubleComplex* a, int32_t lda, + hipDoubleComplex* x, int32_t incx, hipDoubleComplex* beta, hipDoubleComplex* y, int32_t incy, int stream_id) +{ + // CALL_HIPBLAS(hipblasZgemv, (stream_handle(stream_id), get_hipblasOperation_t(transa), m, n, alpha, a, lda, x, incx, beta, y, incy)); + throw std::runtime_error("zgemv not implemented in hipblas with ROCM!"); +} + +inline void zgemm(char transa, char transb, int32_t m, int32_t n, int32_t k, + hipDoubleComplex const* alpha, hipDoubleComplex const* a, int32_t lda, hipDoubleComplex const* b, + int32_t ldb, hipDoubleComplex const* beta, hipDoubleComplex* c, int32_t ldc, int stream_id) +{ + // CALL_HIPBLAS(hipblasZgemm, (stream_handle(stream_id), get_hipblasOperation_t(transa), get_hipblasOperation_t(transb), + // m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + throw std::runtime_error("zgemm not implemented in hipblas with ROCM!"); +} + +inline void dgemm(char transa, char transb, int32_t m, int32_t n, int32_t k, + double const* alpha, double const* a, int32_t lda, double const* b, + int32_t ldb, double const* beta, double* c, int32_t ldc, int stream_id) +{ + CALL_HIPBLAS(hipblasDgemm, (stream_handle(stream_id), get_hipblasOperation_t(transa), get_hipblasOperation_t(transb), + m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); +} + +inline void dtrmm(char side__, char uplo__, char transa__, char diag__, int m__, int n__, + double const* alpha__, double const* A__, int lda__, double* B__, int ldb__) +{ + throw std::runtime_error("dtrmm not implemented in hipblas with ROCM!"); + // hipblasSideMode_t side = get_hipblasSideMode_t(side__); + // hipblasFillMode_t uplo = get_hipblasFillMode_t(uplo__); + // hipblasOperation_t transa = get_hipblasOperation_t(transa__); + // hipblasDiagType_t diag = get_hipblasDiagType_t(diag__); + // CALL_HIPBLAS(hipblasDtrmm, (null_stream_handle(), side, uplo, transa, diag, m__, n__, alpha__, A__, lda__, B__, ldb__, B__, ldb__)); +} + +inline void ztrmm(char side__, + char uplo__, + char transa__, + char diag__, + int m__, + int n__, + hipDoubleComplex const* alpha__, + hipDoubleComplex const* A__, + int lda__, + hipDoubleComplex* B__, + int ldb__) +{ + throw std::runtime_error("ztrmm not implemented in hipblas with ROCM!"); + // hipblasSideMode_t side = get_hipblasSideMode_t(side__); + // hipblasFillMode_t uplo = get_hipblasFillMode_t(uplo__); + // hipblasOperation_t transa = get_hipblasOperation_t(transa__); + // hipblasDiagType_t diag = get_hipblasDiagType_t(diag__); + // CALL_HIPBLAS(hipblasZtrmm, (null_stream_handle(), side, uplo, transa, diag, m__, n__, alpha__, A__, lda__, B__, ldb__, B__, ldb__)); +} + +inline void dger(int m, + int n, + double const* alpha, + double const* x, + int incx, + double const* y, + int incy, + double* A, + int lda, + int stream_id) +{ + CALL_HIPBLAS(hipblasDger, (stream_handle(stream_id), m, n, alpha, x, incx, y, incy, A, lda)); +} + +inline void zgeru(int m, + int n, + hipDoubleComplex const* alpha, + hipDoubleComplex const* x, + int incx, + hipDoubleComplex const* y, + int incy, + hipDoubleComplex* A, + int lda, + int stream_id) +{ + throw std::runtime_error("zgeru not implemented in hipblas with ROCM!"); + // CALL_HIPBLAS(hipblasZgeru, (stream_handle(stream_id), m, n, alpha, x, incx, y, incy, A, lda)); +} + +inline void zaxpy(int n__, + hipDoubleComplex const* alpha__, + hipDoubleComplex const* x__, + int incx__, + hipDoubleComplex* y__, + int incy__) +{ + throw std::runtime_error("zaxpy not implemented in hipblas with ROCM!"); + // CALL_HIPBLAS(hipblasZaxpy, (null_stream_handle(), n__, alpha__, x__, incx__, y__, incy__)); +} + + +} // namespace hipblas + +#endif diff --git a/src/SDDK/linalg.hpp b/src/SDDK/linalg.hpp index 5379ca7b5..506aaa1d0 100644 --- a/src/SDDK/linalg.hpp +++ b/src/SDDK/linalg.hpp @@ -26,8 +26,8 @@ #define __LINALG_HPP__ #include -#ifdef __CUDA -#include "GPU/cublas.hpp" +#ifdef __GPU +#include "GPU/gpublas_interface.hpp" #endif #ifdef __MAGMA #include "GPU/magma.hpp" @@ -86,17 +86,13 @@ inline void linalg2::gemm(char transa, char transb, ftn_int m, ftn_i const_cast(B), &ldb, const_cast(beta), C, &ldc, (ftn_len)1, (ftn_len)1); break; } - case linalg_t::cublas: { -#if defined(__GPU) && defined(__CUDA) - cublas::dgemm(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, sid()); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + case linalg_t::gpublas: { + gpublas::dgemm(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, sid()); break; } case linalg_t::cublasxt: { #if defined(__GPU) && defined(__CUDA) - cublas::xt::dgemm(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + gpublas::xt::dgemm(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); #else throw std::runtime_error("not compiled with cublasxt"); #endif @@ -129,25 +125,21 @@ inline void linalg2::gemm(char transa, char transb, ftn_int const_cast(beta), C, &ldc, (ftn_len)1, (ftn_len)1); break; } - case linalg_t::cublas: { -#if defined(__GPU) && defined(__CUDA) - cublas::zgemm(transa, transb, m, n, k, reinterpret_cast(alpha), - reinterpret_cast(A), lda, reinterpret_cast(B), - ldb, reinterpret_cast(beta), - reinterpret_cast(C), ldc, sid()); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + case linalg_t::gpublas: { + gpublas::zgemm(transa, transb, m, n, k, reinterpret_cast(alpha), + reinterpret_cast(A), lda, reinterpret_cast(B), + ldb, reinterpret_cast(beta), + reinterpret_cast(C), ldc, sid()); break; } case linalg_t::cublasxt: { #if defined(__GPU) && defined(__CUDA) - cublas::xt::zgemm(transa, transb, m, n, k, reinterpret_cast(alpha), - reinterpret_cast(A), lda, - reinterpret_cast(B), ldb, - reinterpret_cast(beta), - reinterpret_cast(C), ldc); + gpublas::xt::zgemm(transa, transb, m, n, k, reinterpret_cast(alpha), + reinterpret_cast(A), lda, + reinterpret_cast(B), ldb, + reinterpret_cast(beta), + reinterpret_cast(C), ldc); #else throw std::runtime_error("not compiled with cublasxt"); #endif @@ -171,12 +163,8 @@ inline void linalg2::ger(ftn_int m, ftn_int n, ftn_double const* alp const_cast(y), &incy, A, &lda); break; } - case linalg_t::cublas: { -#if defined(__GPU) && defined(__CUDA) - cublas::dger(m, n, alpha, x, incx, y, incy, A, lda, sid()); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + case linalg_t::gpublas: { + gpublas::dger(m, n, alpha, x, incx, y, incy, A, lda, sid()); break; } case linalg_t::cublasxt: { @@ -200,17 +188,13 @@ inline void linalg2::trmm(char side, char uplo, char transa, ftn_int const_cast(A), &lda, B, &ldb, (ftn_len)1, (ftn_len)1, (ftn_len)1, (ftn_len)1); break; } - case linalg_t::cublas: { -#if defined(__GPU) && defined(__CUDA) - cublas::dtrmm(side, uplo, transa, 'N', m, n, alpha, A, lda, B, ldb); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + case linalg_t::gpublas: { + gpublas::dtrmm(side, uplo, transa, 'N', m, n, alpha, A, lda, B, ldb); break; } case linalg_t::cublasxt: { #if defined(__GPU) && defined(__CUDA) - cublas::xt::dtrmm(side, uplo, transa, 'N', m, n, alpha, A, lda, B, ldb); + gpublas::xt::dtrmm(side, uplo, transa, 'N', m, n, alpha, A, lda, B, ldb); #else throw std::runtime_error("not compiled with cublasxt"); #endif @@ -234,19 +218,15 @@ inline void linalg2::trmm(char side, char uplo, char transa, const_cast(A), &lda, B, &ldb, (ftn_len)1, (ftn_len)1, (ftn_len)1, (ftn_len)1); break; } - case linalg_t::cublas: { -#if defined(__GPU) && defined(__CUDA) - cublas::ztrmm(side, uplo, transa, 'N', m, n, reinterpret_cast(alpha), - reinterpret_cast(A), lda, reinterpret_cast(B), ldb); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + case linalg_t::gpublas: { + gpublas::ztrmm(side, uplo, transa, 'N', m, n, reinterpret_cast(alpha), + reinterpret_cast(A), lda, reinterpret_cast(B), ldb); break; } case linalg_t::cublasxt: { #if defined(__GPU) && defined(__CUDA) - cublas::xt::ztrmm(side, uplo, transa, 'N', m, n, reinterpret_cast(alpha), - reinterpret_cast(A), lda, reinterpret_cast(B), ldb); + gpublas::xt::ztrmm(side, uplo, transa, 'N', m, n, reinterpret_cast(alpha), + reinterpret_cast(A), lda, reinterpret_cast(B), ldb); #else throw std::runtime_error("not compiled with cublasxt"); #endif @@ -1183,12 +1163,8 @@ inline void linalg::gemv(int trans__, ftn_int m, ftn_in ftn_double_complex* beta, ftn_double_complex* y, ftn_int incy, int stream_id) { -#if defined(__GPU) && defined(__CUDA) const char trans[] = {'N', 'T', 'C'}; - cublas::zgemv(trans[trans__], m, n, (cuDoubleComplex*)alpha, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)x, incx, (cuDoubleComplex*)beta, (cuDoubleComplex*)y, incy, stream_id); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + gpublas::zgemv(trans[trans__], m, n, (hipDoubleComplex*)alpha, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stream_id); } // Generic interface to zgemm @@ -1204,12 +1180,8 @@ inline void linalg::gemm(int transa__, int transb__, ft assert(m > 0); assert(n > 0); assert(k > 0); -#if defined(__GPU) && defined(__CUDA) const char trans[] = {'N', 'T', 'C'}; - cublas::zgemm(trans[transa__], trans[transb__], m, n, k, (cuDoubleComplex*)alpha, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)B, ldb, (cuDoubleComplex*)beta, (cuDoubleComplex*)C, ldc, stream_id); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + gpublas::zgemm(trans[transa__], trans[transb__], m, n, k, (hipDoubleComplex*)alpha, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, stream_id); } // Generic interface to dgemm @@ -1225,12 +1197,8 @@ inline void linalg::gemm(int transa__, int transb__, ftn_int m, assert(m > 0); assert(n > 0); assert(k > 0); -#if defined(__GPU) && defined(__CUDA) const char trans[] = {'N', 'T', 'C'}; - cublas::dgemm(trans[transa__], trans[transb__], m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, stream_id); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + gpublas::dgemm(trans[transa__], trans[transb__], m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, stream_id); } template <> @@ -1273,11 +1241,7 @@ inline void linalg::trmm(char side, ftn_double* B, ftn_int ldb) { -#if defined(__GPU) && defined(__CUDA) - cublas::dtrmm(side, uplo, transa, 'N', m, n, alpha, A, lda, B, ldb); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + gpublas::dtrmm(side, uplo, transa, 'N', m, n, alpha, A, lda, B, ldb); } template <> @@ -1292,11 +1256,7 @@ inline void linalg::trmm(char side, ftn_double_complex* B, ftn_int ldb) { -#if defined(__GPU) && defined(__CUDA) - cublas::ztrmm(side, uplo, transa, 'N', m, n, (cuDoubleComplex*)alpha, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)B, ldb); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + gpublas::ztrmm(side, uplo, transa, 'N', m, n, (hipDoubleComplex*)alpha, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb); } template <> @@ -1307,11 +1267,7 @@ inline void linalg::axpy(ftn_int n__, ftn_double_complex* y__, ftn_int incy__) { -#if defined(__GPU) && defined(__CUDA) - cublas::zaxpy(n__, (cuDoubleComplex const*)alpha__, (cuDoubleComplex*)x__, incx__, (cuDoubleComplex*)y__, incy__); -#else - throw std::runtime_error("not compiled with cublas"); -#endif + gpublas::zaxpy(n__, (hipDoubleComplex const*)alpha__, (hipDoubleComplex*)x__, incx__, (hipDoubleComplex*)y__, incy__); } #endif // __GPU diff --git a/src/SDDK/linalg_base.hpp b/src/SDDK/linalg_base.hpp index 9eac4ef64..fdd88d6df 100644 --- a/src/SDDK/linalg_base.hpp +++ b/src/SDDK/linalg_base.hpp @@ -64,7 +64,7 @@ enum class linalg_t blas, lapack, scalapack, - cublas, + gpublas, cublasxt, magma }; @@ -77,7 +77,8 @@ inline linalg_t get_linalg_t(std::string name__) {"blas", linalg_t::blas}, {"lapack", linalg_t::lapack}, {"scalapack", linalg_t::scalapack}, - {"cublas", linalg_t::cublas}, + {"cublas", linalg_t::gpublas}, + {"gpublas", linalg_t::gpublas}, {"cublasxt", linalg_t::cublasxt}, {"magma", linalg_t::magma}, }; diff --git a/src/SDDK/wf_inner.hpp b/src/SDDK/wf_inner.hpp index 09421a298..dd51a0b1c 100644 --- a/src/SDDK/wf_inner.hpp +++ b/src/SDDK/wf_inner.hpp @@ -49,7 +49,7 @@ void inner_local(memory_t mem__, linalg_t la__, int ispn__, Wave_functio beta__, buf__, ld__, sid__); /* subtract one extra G=0 contribution */ if (comm.rank() == 0) { - linalg_t la = is_host_memory(mem__) ? linalg_t::blas : linalg_t::cublas; + linalg_t la = is_host_memory(mem__) ? linalg_t::blas : linalg_t::gpublas; linalg2(la).ger(m__, n__, &linalg_const::m_one(), reinterpret_cast(bra__.pw_coeffs(s).prime().at(bra__.preferred_memory_t(), 0, i0__)), 2 * bra__.pw_coeffs(s).prime().ld(), diff --git a/src/simulation_context.hpp b/src/simulation_context.hpp index d08558f2b..e2b446911 100644 --- a/src/simulation_context.hpp +++ b/src/simulation_context.hpp @@ -1251,10 +1251,14 @@ inline void Simulation_context::initialize() } case device_t::GPU: { if (control_input_.memory_usage_ == "high") { - blas_linalg_t_ = linalg_t::cublas; + blas_linalg_t_ = linalg_t::gpublas; } if (control_input_.memory_usage_ == "low" || control_input_.memory_usage_ == "medium") { +#ifdef __ROCM + blas_linalg_t_ = linalg_t::gpublas; +#else blas_linalg_t_ = linalg_t::cublasxt; +#endif } break; } diff --git a/src/sirius.h b/src/sirius.h index 0edff6699..836c1bb5b 100644 --- a/src/sirius.h +++ b/src/sirius.h @@ -86,8 +86,10 @@ inline void initialize(bool call_mpi_init__ = true) acc::set_device_id(devid); } acc::create_streams(omp_get_max_threads() + 1); +#if defined(__GPU) + gpublas::create_stream_handles(); +#endif #if defined(__CUDA) - cublas::create_stream_handles(); cublas::xt::create_handle(); cusolver::create_handle(); #endif @@ -127,11 +129,14 @@ inline void finalize(bool call_mpi_fin__ = true, bool reset_device__ = true, boo if (acc::num_devices()) { //acc::set_device(); +#if defined(__GPU) + gpublas::destroy_stream_handles(); +#endif #if defined(__CUDA) cusolver::destroy_handle(); cublas::xt::destroy_handle(); - cublas::destroy_stream_handles(); #endif + acc::destroy_streams(); if (reset_device__) { acc::reset(); From dedf872d3cdb0b3d8240a3e37a921520f9abe2b6 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Thu, 28 Feb 2019 17:07:32 +0100 Subject: [PATCH 11/28] small fixes for non-ROCM compilation --- apps/unit_tests/test_wf_ortho.cpp | 2 +- src/Band/diag_pseudo_potential.hpp | 2 +- src/SDDK/fft3d.hpp | 55 ++++++++++++++---------------- src/SDDK/linalg.hpp | 20 +++++++++++ 4 files changed, 48 insertions(+), 31 deletions(-) diff --git a/apps/unit_tests/test_wf_ortho.cpp b/apps/unit_tests/test_wf_ortho.cpp index 5542c646b..cb62cfcc4 100644 --- a/apps/unit_tests/test_wf_ortho.cpp +++ b/apps/unit_tests/test_wf_ortho.cpp @@ -37,7 +37,7 @@ void test_wf_ortho(std::vector mpi_grid_dims__, linalg_t la{linalg_t::blas}; memory_t mem{memory_t::host}; if (pu == device_t::GPU) { - la = linalg_t::cublas; + la = linalg_t::gpublas; mem = memory_t::device; } diff --git a/src/Band/diag_pseudo_potential.hpp b/src/Band/diag_pseudo_potential.hpp index f2bde7246..a00ac9972 100644 --- a/src/Band/diag_pseudo_potential.hpp +++ b/src/Band/diag_pseudo_potential.hpp @@ -22,9 +22,9 @@ * \brief Diagonalization of pseudopotential Hamiltonian. */ -#include #if defined(__GPU) && defined(__CUDA) +#include extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, int n, double c, diff --git a/src/SDDK/fft3d.hpp b/src/SDDK/fft3d.hpp index ca8f99945..bba8d09c1 100644 --- a/src/SDDK/fft3d.hpp +++ b/src/SDDK/fft3d.hpp @@ -33,11 +33,12 @@ #if defined(__GPU) && defined(__CUDA) #include "GPU/cufft.hpp" #include "GPU/fft_kernels.hpp" -#define GPUFFT cufft +namespace gpufft = cufft; + #elif defined(__GPU) && defined(__ROCM) #include "GPU/rocfft_interface.hpp" #include "GPU/fft_kernels.hpp" -#define GPUFFT rocfft +namespace gpufft = rocfft; #endif namespace sddk { @@ -197,11 +198,11 @@ class FFT3D : public FFT3D_grid case device_t::GPU: { #if defined(__GPU) if (*acc_fft_plan__) { - GPUFFT::destroy_plan_handle(*acc_fft_plan__); + gpufft::destroy_plan_handle(*acc_fft_plan__); } int dim_z[] = {size(2)}; - *acc_fft_plan__ = GPUFFT::create_batch_plan(1, dim_z, dim_z, 1, size(2), zcol_count_max__, false); - GPUFFT::set_stream(*acc_fft_plan__, stream_id(acc_fft_stream_id_)); + *acc_fft_plan__ = gpufft::create_batch_plan(1, dim_z, dim_z, 1, size(2), zcol_count_max__, false); + gpufft::set_stream(*acc_fft_plan__, stream_id(acc_fft_stream_id_)); #endif break; } @@ -271,7 +272,7 @@ class FFT3D : public FFT3D_grid fft_buffer_aux__.at(memory_t::device), acc_fft_stream_id_); } /* transform all columns */ - GPUFFT::backward_transform(acc_fft_plan_z__, fft_buffer_aux__.at(memory_t::device)); + gpufft::backward_transform(acc_fft_plan_z__, fft_buffer_aux__.at(memory_t::device)); /* repack from fft_buffer_aux to fft_buffer */ repack_z_buffer_gpu(direction, comm_.size(), size(2), num_zcol_local, max_zloc_size_, @@ -297,7 +298,7 @@ class FFT3D : public FFT3D_grid fft_buffer_.at(memory_t::device)); /* transform all columns */ - GPUFFT::forward_transform(acc_fft_plan_z__, fft_buffer_aux__.at(memory_t::device)); + gpufft::forward_transform(acc_fft_plan_z__, fft_buffer_aux__.at(memory_t::device)); /* get all columns from FFT buffer */ batch_unload_gpu(gvec_partition_->zcol_count_fft() * size(2), gvec_partition_->gvec_count_fft(), 1, map_gvec_to_fft_buffer_.at(memory_t::device), @@ -499,12 +500,12 @@ class FFT3D : public FFT3D_grid gvec_partition_->gvec().num_zcol(), z_col_pos_.at(memory_t::device), is_reduced, acc_fft_stream_id_); /* stream #0 executes FFT */ - GPUFFT::backward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); + gpufft::backward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); break; } case -1: { /* stream #0 executes FFT */ - GPUFFT::forward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); + gpufft::forward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); /* stream #0 packs z-columns */ pack_z_cols_gpu(fft_buffer_aux__.at(memory_t::device), fft_buffer_.at(memory_t::device), size(0), size(1), local_size_z(), @@ -591,12 +592,12 @@ class FFT3D : public FFT3D_grid fft_buffer_.at(memory_t::device), size(0), size(1), local_size_z(), gvec_partition_->gvec().num_zcol(), z_col_pos_.at(memory_t::device), acc_fft_stream_id_); /* stream #0 executes FFT */ - GPUFFT::backward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); + gpufft::backward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); break; } case -1: { /* stream #0 executes FFT */ - GPUFFT::forward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); + gpufft::forward_transform(acc_fft_plan_xy_, fft_buffer_.at(memory_t::device)); /* stream #0 packs z-columns */ pack_z_cols_2_gpu(fft_buffer_aux1__.at(memory_t::device), fft_buffer_aux2__.at(memory_t::device), @@ -739,12 +740,12 @@ class FFT3D : public FFT3D_grid bool auto_alloc{false}; int dim_xy[] = {size(1), size(0)}; /* create plan for xy transform */ - acc_fft_plan_xy_ = GPUFFT::create_batch_plan(2, dim_xy, dim_xy, 1, size(0) * size(1), local_size_z(), + acc_fft_plan_xy_ = gpufft::create_batch_plan(2, dim_xy, dim_xy, 1, size(0) * size(1), local_size_z(), auto_alloc); /* in CUDA case this is an alias */ acc_fft_plan_xy_ = acc_fft_plan_xy_; /* stream #0 will execute FFTs */ - GPUFFT::set_stream(acc_fft_plan_xy_, stream_id(acc_fft_stream_id_)); + gpufft::set_stream(acc_fft_plan_xy_, stream_id(acc_fft_stream_id_)); /* allocate arrays with z- offsets and sizes on the host and device*/ z_offsets_ = mdarray(comm_.size()); z_sizes_ = mdarray(comm_.size()); @@ -783,12 +784,12 @@ class FFT3D : public FFT3D_grid } #if defined(__GPU) if (pu_ == device_t::GPU) { - GPUFFT::destroy_plan_handle(acc_fft_plan_xy_); + gpufft::destroy_plan_handle(acc_fft_plan_xy_); if (acc_fft_plan_z_gvec_) { - GPUFFT::destroy_plan_handle(acc_fft_plan_z_gvec_); + gpufft::destroy_plan_handle(acc_fft_plan_z_gvec_); } if (acc_fft_plan_z_gkvec_) { - GPUFFT::destroy_plan_handle(acc_fft_plan_z_gkvec_); + gpufft::destroy_plan_handle(acc_fft_plan_z_gkvec_); } #if defined(__ROCM) rocfft::finalize(); @@ -993,36 +994,32 @@ class FFT3D : public FFT3D_grid map_gvec_to_fft_buffer_x0y0_.allocate(memory_t::device).copy_to(memory_t::device); } #if defined(__GPU) +#if defined(__CUDA) int zcol_count_max{0}; if (gvp__.gvec().bare()) { zcol_count_max = zcol_gvec_count_max_; } else { zcol_count_max = zcol_gkvec_count_max_; } - size_t work_size; int dim_z[] = {size(2)}; int dims_xy[] = {size(1), size(0)}; -#endif - -#if defined(__GPU) /* maximum worksize of z and xy transforms */ -#if defined(__CUDA) - work_size = std::max(GPUFFT::get_work_size(2, dims_xy, local_size_z()), - GPUFFT::get_work_size(1, dim_z, zcol_count_max)); + size_t work_size = std::max(gpufft::get_work_size(2, dims_xy, local_size_z()), + gpufft::get_work_size(1, dim_z, zcol_count_max)); #elif defined(__ROCM) - work_size = std::max(GPUFFT::get_work_size(acc_fft_plan_xy_), - GPUFFT::get_work_size(acc_fft_plan_z_gvec_)); + size_t work_size = std::max(gpufft::get_work_size(acc_fft_plan_xy_), + gpufft::get_work_size(acc_fft_plan_z_gvec_)); #endif /* allocate accelerator fft work buffer */ acc_fft_work_buf_ = mdarray(work_size, memory_t::device, "FFT3D.acc_fft_work_buf_"); - /* set work area for GPUFFT */ - GPUFFT::set_work_area(acc_fft_plan_xy_, acc_fft_work_buf_.at(memory_t::device)); + /* set work area for gpufft */ + gpufft::set_work_area(acc_fft_plan_xy_, acc_fft_work_buf_.at(memory_t::device)); if (gvp__.gvec().bare()) { - GPUFFT::set_work_area(acc_fft_plan_z_gvec_, acc_fft_work_buf_.at(memory_t::device)); + gpufft::set_work_area(acc_fft_plan_z_gvec_, acc_fft_work_buf_.at(memory_t::device)); } else { - GPUFFT::set_work_area(acc_fft_plan_z_gkvec_, acc_fft_work_buf_.at(memory_t::device)); + gpufft::set_work_area(acc_fft_plan_z_gkvec_, acc_fft_work_buf_.at(memory_t::device)); } #endif fft_buffer_aux1_.allocate(memory_t::device); diff --git a/src/SDDK/linalg.hpp b/src/SDDK/linalg.hpp index 506aaa1d0..0b4e113f0 100644 --- a/src/SDDK/linalg.hpp +++ b/src/SDDK/linalg.hpp @@ -87,7 +87,11 @@ inline void linalg2::gemm(char transa, char transb, ftn_int m, ftn_i break; } case linalg_t::gpublas: { +#ifdef __GPU gpublas::dgemm(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, sid()); +#else + throw std::runtime_error("not compiled with GPU blas support!"); +#endif break; } case linalg_t::cublasxt: { @@ -126,10 +130,14 @@ inline void linalg2::gemm(char transa, char transb, ftn_int break; } case linalg_t::gpublas: { +#ifdef __GPU gpublas::zgemm(transa, transb, m, n, k, reinterpret_cast(alpha), reinterpret_cast(A), lda, reinterpret_cast(B), ldb, reinterpret_cast(beta), reinterpret_cast(C), ldc, sid()); +#else + throw std::runtime_error("not compiled with GPU blas support!"); +#endif break; } @@ -164,7 +172,11 @@ inline void linalg2::ger(ftn_int m, ftn_int n, ftn_double const* alp break; } case linalg_t::gpublas: { +#ifdef __GPU gpublas::dger(m, n, alpha, x, incx, y, incy, A, lda, sid()); +#else + throw std::runtime_error("not compiled with GPU blas support!"); +#endif break; } case linalg_t::cublasxt: { @@ -189,7 +201,11 @@ inline void linalg2::trmm(char side, char uplo, char transa, ftn_int break; } case linalg_t::gpublas: { +#ifdef __GPU gpublas::dtrmm(side, uplo, transa, 'N', m, n, alpha, A, lda, B, ldb); +#else + throw std::runtime_error("not compiled with GPU blas support!"); +#endif break; } case linalg_t::cublasxt: { @@ -219,8 +235,12 @@ inline void linalg2::trmm(char side, char uplo, char transa, break; } case linalg_t::gpublas: { +#ifdef __GPU gpublas::ztrmm(side, uplo, transa, 'N', m, n, reinterpret_cast(alpha), reinterpret_cast(A), lda, reinterpret_cast(B), ldb); +#else + throw std::runtime_error("not compiled with GPU blas support!"); +#endif break; } case linalg_t::cublasxt: { From 419fc7b6148577c2ef12f2c0bb46b3cc98fcfec3 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Fri, 1 Mar 2019 11:12:30 +0100 Subject: [PATCH 12/28] added hipblas_port library to provide complex type support for select blas functions on AMD gpus --- CMakeLists.txt | 4 +- cmake/modules/FindROCM.cmake | 2 +- src/SDDK/GPU/hipblas_interface.hpp | 35 +- src/SDDK/GPU/hipblas_port/CMakeLists.txt | 22 + src/SDDK/GPU/hipblas_port/hipblas_port.h | 115 +++ .../GPU/hipblas_port/hipblas_port.hip.cpp | 189 +++++ src/SDDK/GPU/hipblas_port/rocblas_port.h | 116 +++ .../hipblas_port/rocblas_port/definitions.h | 115 +++ .../GPU/hipblas_port/rocblas_port/handle.h | 246 ++++++ .../rocblas_port/port_helper_func.h | 61 ++ .../rocblas_port/port_hip_roc_translation.h | 258 ++++++ .../GPU/hipblas_port/rocblas_port/reduction.h | 313 ++++++++ .../hipblas_port/rocblas_port/rocblas-types.h | 151 ++++ .../GPU/hipblas_port/rocblas_port/status.h | 64 ++ .../GPU/hipblas_port/rocblas_port/utility.h | 151 ++++ .../hipblas_port/rocblas_port_axpy.hip.cpp | 100 +++ .../hipblas_port/rocblas_port_gemm.hip.cpp | 401 ++++++++++ .../hipblas_port/rocblas_port_gemv.hip.cpp | 374 +++++++++ .../GPU/hipblas_port/rocblas_port_ger.hip.cpp | 108 +++ .../hipblas_port/rocblas_port_trmm.hip.cpp | 744 ++++++++++++++++++ 20 files changed, 3551 insertions(+), 18 deletions(-) create mode 100644 src/SDDK/GPU/hipblas_port/CMakeLists.txt create mode 100644 src/SDDK/GPU/hipblas_port/hipblas_port.h create mode 100644 src/SDDK/GPU/hipblas_port/hipblas_port.hip.cpp create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port.h create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port/definitions.h create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port/handle.h create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port/port_helper_func.h create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port/port_hip_roc_translation.h create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port/reduction.h create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port/rocblas-types.h create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port/status.h create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port/utility.h create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port_axpy.hip.cpp create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port_gemm.hip.cpp create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port_gemv.hip.cpp create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port_ger.hip.cpp create mode 100644 src/SDDK/GPU/hipblas_port/rocblas_port_trmm.hip.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cebc6449d..fd7b066e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,6 +141,8 @@ if(USE_ROCM) add_definitions("-D__ROCM") add_definitions(${ROCM_DEFINITIONS}) include_directories(${ROCM_INCLUDE_DIRS}) + add_subdirectory(src/SDDK/GPU/hipblas_port) + include_directories(src/SDDK/GPU/hipblas_port) endif() # add required libraries @@ -231,7 +233,7 @@ MACRO(SIRIUS_SETUP_TARGET _target) endif() if(USE_ROCM) - target_link_libraries(${_target} PRIVATE sirius_rocm sirius_cpp ${ROCM_LIBRARIES}) + target_link_libraries(${_target} PRIVATE sirius_rocm sirius_cpp hipblas_port ${ROCM_LIBRARIES}) endif() ENDMACRO() diff --git a/cmake/modules/FindROCM.cmake b/cmake/modules/FindROCM.cmake index 70f3ea06d..e5072af27 100644 --- a/cmake/modules/FindROCM.cmake +++ b/cmake/modules/FindROCM.cmake @@ -360,7 +360,7 @@ macro(rocm_hip_add_library) # generate flags to use set(_ROCM_STD_FLAGS ${HIP_LIB_FLAGS} ${ROCM_HIPCC_FLAGS}) - list(FILTER _ROCM_STD_FLAGS INCLUDE REGEX ^-std=) + list(FILTER _ROCM_STD_FLAGS INCLUDE REGEX -std=) set(_ROCM_FLAGS ${HIP_LIB_FLAGS}) if(CMAKE_CXX_STANDARD AND NOT _ROCM_STD_FLAGS) list(APPEND _ROCM_FLAGS -std=c++${CMAKE_CXX_STANDARD}) diff --git a/src/SDDK/GPU/hipblas_interface.hpp b/src/SDDK/GPU/hipblas_interface.hpp index ed7ab6a21..d1da39e15 100644 --- a/src/SDDK/GPU/hipblas_interface.hpp +++ b/src/SDDK/GPU/hipblas_interface.hpp @@ -30,6 +30,7 @@ #include #include #include "acc.hpp" +#include "hipblas_port.h" namespace hipblas { @@ -231,12 +232,13 @@ inline void dgemm(char transa, char transb, int32_t m, int32_t n, int32_t k, inline void dtrmm(char side__, char uplo__, char transa__, char diag__, int m__, int n__, double const* alpha__, double const* A__, int lda__, double* B__, int ldb__) { - throw std::runtime_error("dtrmm not implemented in hipblas with ROCM!"); - // hipblasSideMode_t side = get_hipblasSideMode_t(side__); - // hipblasFillMode_t uplo = get_hipblasFillMode_t(uplo__); - // hipblasOperation_t transa = get_hipblasOperation_t(transa__); - // hipblasDiagType_t diag = get_hipblasDiagType_t(diag__); - // CALL_HIPBLAS(hipblasDtrmm, (null_stream_handle(), side, uplo, transa, diag, m__, n__, alpha__, A__, lda__, B__, ldb__, B__, ldb__)); + // throw std::runtime_error("dtrmm not implemented in hipblas with ROCM!"); + hipblasSideMode_t side = get_hipblasSideMode_t(side__); + hipblasFillMode_t uplo = get_hipblasFillMode_t(uplo__); + hipblasOperation_t transa = get_hipblasOperation_t(transa__); + hipblasDiagType_t diag = get_hipblasDiagType_t(diag__); + CALL_HIPBLAS(hipblas_port_Dtrmm, (null_stream_handle(), side, uplo, transa, diag, m__, n__, alpha__, A__, lda__, + B__, ldb__, B__, ldb__)); } inline void ztrmm(char side__, @@ -251,12 +253,13 @@ inline void ztrmm(char side__, hipDoubleComplex* B__, int ldb__) { - throw std::runtime_error("ztrmm not implemented in hipblas with ROCM!"); - // hipblasSideMode_t side = get_hipblasSideMode_t(side__); - // hipblasFillMode_t uplo = get_hipblasFillMode_t(uplo__); - // hipblasOperation_t transa = get_hipblasOperation_t(transa__); - // hipblasDiagType_t diag = get_hipblasDiagType_t(diag__); - // CALL_HIPBLAS(hipblasZtrmm, (null_stream_handle(), side, uplo, transa, diag, m__, n__, alpha__, A__, lda__, B__, ldb__, B__, ldb__)); + // throw std::runtime_error("ztrmm not implemented in hipblas with ROCM!"); + hipblasSideMode_t side = get_hipblasSideMode_t(side__); + hipblasFillMode_t uplo = get_hipblasFillMode_t(uplo__); + hipblasOperation_t transa = get_hipblasOperation_t(transa__); + hipblasDiagType_t diag = get_hipblasDiagType_t(diag__); + CALL_HIPBLAS(hipblas_port_Ztrmm, (null_stream_handle(), side, uplo, transa, diag, m__, n__, alpha__, A__, lda__, + B__, ldb__, B__, ldb__)); } inline void dger(int m, @@ -284,8 +287,8 @@ inline void zgeru(int m, int lda, int stream_id) { - throw std::runtime_error("zgeru not implemented in hipblas with ROCM!"); - // CALL_HIPBLAS(hipblasZgeru, (stream_handle(stream_id), m, n, alpha, x, incx, y, incy, A, lda)); + // throw std::runtime_error("zgeru not implemented in hipblas with ROCM!"); + CALL_HIPBLAS(hipblas_port_Zgeru, (stream_handle(stream_id), m, n, alpha, x, incx, y, incy, A, lda)); } inline void zaxpy(int n__, @@ -295,8 +298,8 @@ inline void zaxpy(int n__, hipDoubleComplex* y__, int incy__) { - throw std::runtime_error("zaxpy not implemented in hipblas with ROCM!"); - // CALL_HIPBLAS(hipblasZaxpy, (null_stream_handle(), n__, alpha__, x__, incx__, y__, incy__)); + // throw std::runtime_error("zaxpy not implemented in hipblas with ROCM!"); + CALL_HIPBLAS(hipblas_port_Zaxpy, (null_stream_handle(), n__, alpha__, x__, incx__, y__, incy__)); } diff --git a/src/SDDK/GPU/hipblas_port/CMakeLists.txt b/src/SDDK/GPU/hipblas_port/CMakeLists.txt new file mode 100644 index 000000000..6c279732d --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/CMakeLists.txt @@ -0,0 +1,22 @@ + +if (BUILD_HIPBLAS_TESTS) + add_subdirectory(ext/googletest) + add_executable(test_hipblas tests/main.cpp tests/gemv_test.cpp tests/gemm_test.cpp tests/trmm_test.cpp tests/ger_test.cpp tests/axpy_test.cpp) + target_link_libraries(run_tests ${ROCM_LIBRARIES} hipblas_port gtest_main) +endif() + + +# set(DEFINITIONS_PROP "$") +# set(DEFINITIONS_GENERATOR "$<$:-D$>") +# set(INCLUDE_DIR_PROP "$") +# set(INCLUDE_DIR_GENERATOR "$<$:-I$>") + +rocm_hip_add_library(hipblas_port SHARED rocblas_port_gemv.hip.cpp hipblas_port.hip.cpp rocblas_port_gemm.hip.cpp rocblas_port_trmm.hip.cpp rocblas_port_ger.hip.cpp rocblas_port_axpy.hip.cpp + FLAGS "-Wno-macro-redefined -std=c++14" INCLUDE_DIRS ${ROCM_INCLUDE_DIRS}) + +if (BUILD_HIPBLAS_TESTS) + add_subdirectory(ext/googletest) + add_executable(test_hipblas tests/main.cpp tests/gemv_test.cpp tests/gemm_test.cpp tests/trmm_test.cpp tests/ger_test.cpp tests/axpy_test.cpp) + target_link_libraries(run_tests ${ROCM_LIBRARIES} hipblas_port gtest_main) +endif() + diff --git a/src/SDDK/GPU/hipblas_port/hipblas_port.h b/src/SDDK/GPU/hipblas_port/hipblas_port.h new file mode 100644 index 000000000..3eaa6e9c4 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/hipblas_port.h @@ -0,0 +1,115 @@ +#ifndef _HIPBLAS_PORT_H_ +#define _HIPBLAS_PORT_H_ + +#include +#include +#include + +/* + * GEMV + */ +hipblasStatus_t hipblas_port_Sgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float* alpha, + const float* A, int lda, const float* x, int incx, const float* beta, float* y, + int incy); + +hipblasStatus_t hipblas_port_Dgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const double* alpha, + const double* A, int lda, const double* x, int incx, const double* beta, double* y, + int incy); + +hipblasStatus_t hipblas_port_Cgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, + const hipFloatComplex* alpha, const hipFloatComplex* A, int lda, + const hipFloatComplex* x, int incx, const hipFloatComplex* beta, hipFloatComplex* y, + int incy); + +hipblasStatus_t hipblas_port_Zgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, + const hipDoubleComplex* alpha, const hipDoubleComplex* A, int lda, + const hipDoubleComplex* x, int incx, const hipDoubleComplex* beta, + hipDoubleComplex* y, int incy); + +/* + * GEMM + */ +hipblasStatus_t hipblas_port_Sgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, + int m, int n, int k, const float *alpha, + const float *A, int lda, + const float *B, int ldb, + const float *beta, + float *C, int ldc); + +hipblasStatus_t hipblas_port_Dgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, + int m, int n, int k, const double *alpha, + const double *A, int lda, + const double *B, int ldb, + const double *beta, + double *C, int ldc); + +hipblasStatus_t hipblas_port_Cgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, + int m, int n, int k, const hipFloatComplex *alpha, + const hipFloatComplex *A, int lda, + const hipFloatComplex *B, int ldb, + const hipFloatComplex *beta, + hipFloatComplex *C, int ldc); + +hipblasStatus_t hipblas_port_Zgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, + int m, int n, int k, const hipDoubleComplex *alpha, + const hipDoubleComplex *A, int lda, + const hipDoubleComplex *B, int ldb, + const hipDoubleComplex *beta, + hipDoubleComplex *C, int ldc); + +/* + * TRMM + */ + +hipblasStatus_t hipblas_port_Strmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, + hipblasOperation_t trans, hipblasDiagType_t diag, int m, int n, const float* alpha, + const float* A, int lda, const float* B, int ldb, float* C, int ldc); + +hipblasStatus_t hipblas_port_Dtrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, + hipblasOperation_t trans, hipblasDiagType_t diag, int m, int n, const double* alpha, + const double* A, int lda, const double* B, int ldb, double* C, int ldc); + +hipblasStatus_t hipblas_port_Ctrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, + hipblasOperation_t trans, hipblasDiagType_t diag, int m, int n, + const hipFloatComplex* alpha, const hipFloatComplex* A, int lda, + const hipFloatComplex* B, int ldb, hipFloatComplex* C, int ldc); + +hipblasStatus_t hipblas_port_Ztrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, + hipblasOperation_t trans, hipblasDiagType_t diag, int m, int n, + const hipDoubleComplex* alpha, const hipDoubleComplex* A, int lda, + const hipDoubleComplex* B, int ldb, hipDoubleComplex* C, int ldc); + + + +/* + * GER + */ +hipblasStatus_t hipblas_port_Sger(hipblasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, + const float* y, int incy, float* A, int lda); + +hipblasStatus_t hipblas_port_Dger(hipblasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, + const double* y, int incy, double* A, int lda); + +hipblasStatus_t hipblas_port_Cgeru(hipblasHandle_t handle, int m, int n, const hipFloatComplex* alpha, + const hipFloatComplex* x, int incx, const hipFloatComplex* y, int incy, + hipFloatComplex* A, int lda); + +hipblasStatus_t hipblas_port_Zgeru(hipblasHandle_t handle, int m, int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* x, int incx, const hipDoubleComplex* y, int incy, + hipDoubleComplex* A, int lda); + +/* + * AXPY + */ +hipblasStatus_t hipblas_port_Saxpy(hipblasHandle_t handle, int n, const float* alpha, const float* x, int incx, + float* y, int incy); + +hipblasStatus_t hipblas_port_Daxpy(hipblasHandle_t handle, int n, const double* alpha, const double* x, int incx, + double* y, int incy); + +hipblasStatus_t hipblas_port_Caxpy(hipblasHandle_t handle, int n, const hipFloatComplex* alpha, + const hipFloatComplex* x, int incx, hipFloatComplex* y, int incy); + +hipblasStatus_t hipblas_port_Zaxpy(hipblasHandle_t handle, int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* x, int incx, hipDoubleComplex* y, int incy); +#endif diff --git a/src/SDDK/GPU/hipblas_port/hipblas_port.hip.cpp b/src/SDDK/GPU/hipblas_port/hipblas_port.hip.cpp new file mode 100644 index 000000000..966aced2c --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/hipblas_port.hip.cpp @@ -0,0 +1,189 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#include "rocblas_port.h" +#include +#include "rocblas_port/port_hip_roc_translation.h" + +hipblasStatus_t hipblas_port_Sgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float* alpha, + const float* A, int lda, const float* x, int incx, const float* beta, float* y, + int incy) +{ + return rocBLASStatusToHIPStatus(rocblas_port_sgemv((rocblas_handle)handle, hipOperationToHCCOperation(trans), m, n, + alpha, A, lda, x, incx, beta, y, incy)); +} + +hipblasStatus_t hipblas_port_Dgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const double* alpha, + const double* A, int lda, const double* x, int incx, const double* beta, double* y, + int incy) +{ + return rocBLASStatusToHIPStatus(rocblas_port_dgemv((rocblas_handle)handle, hipOperationToHCCOperation(trans), m, n, + alpha, A, lda, x, incx, beta, y, incy)); +} + +hipblasStatus_t hipblas_port_Cgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, + const hipFloatComplex* alpha, const hipFloatComplex* A, int lda, + const hipFloatComplex* x, int incx, const hipFloatComplex* beta, hipFloatComplex* y, + int incy) +{ + return rocBLASStatusToHIPStatus(rocblas_port_cgemv((rocblas_handle)handle, hipOperationToHCCOperation(trans), m, n, + alpha, A, lda, x, incx, beta, y, incy)); +} + +hipblasStatus_t hipblas_port_Zgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, + const hipDoubleComplex* alpha, const hipDoubleComplex* A, int lda, + const hipDoubleComplex* x, int incx, const hipDoubleComplex* beta, + hipDoubleComplex* y, int incy) +{ + return rocBLASStatusToHIPStatus(rocblas_port_zgemv((rocblas_handle)handle, hipOperationToHCCOperation(trans), m, n, + alpha, A, lda, x, incx, beta, y, incy)); +} + +hipblasStatus_t hipblas_port_Sgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, + int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, + const float* beta, float* C, int ldc) +{ + return rocBLASStatusToHIPStatus(rocblas_port_sgemm((rocblas_handle)handle, hipOperationToHCCOperation(transa), + hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, + ldb, beta, C, ldc)); +} + +hipblasStatus_t hipblas_port_Dgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, + int n, int k, const double* alpha, const double* A, int lda, const double* B, + int ldb, const double* beta, double* C, int ldc) +{ + return rocBLASStatusToHIPStatus(rocblas_port_dgemm((rocblas_handle)handle, hipOperationToHCCOperation(transa), + hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, + ldb, beta, C, ldc)); +} + +hipblasStatus_t hipblas_port_Cgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, + int n, int k, const hipFloatComplex* alpha, const hipFloatComplex* A, int lda, + const hipFloatComplex* B, int ldb, const hipFloatComplex* beta, hipFloatComplex* C, + int ldc) +{ + return rocBLASStatusToHIPStatus(rocblas_port_cgemm((rocblas_handle)handle, hipOperationToHCCOperation(transa), + hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, + ldb, beta, C, ldc)); +} + +hipblasStatus_t hipblas_port_Zgemm(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, + int n, int k, const hipDoubleComplex* alpha, const hipDoubleComplex* A, int lda, + const hipDoubleComplex* B, int ldb, const hipDoubleComplex* beta, + hipDoubleComplex* C, int ldc) +{ + return rocBLASStatusToHIPStatus(rocblas_port_zgemm((rocblas_handle)handle, hipOperationToHCCOperation(transa), + hipOperationToHCCOperation(transb), m, n, k, alpha, A, lda, B, + ldb, beta, C, ldc)); +} + +hipblasStatus_t hipblas_port_Strmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, + hipblasOperation_t trans, hipblasDiagType_t diag, int m, int n, const float* alpha, + const float* A, int lda, const float* B, int ldb, float* C, int ldc) +{ + + return rocBLASStatusToHIPStatus(rocblas_port_strmm( + (rocblas_handle)handle, hipSideToHCCSide(side), hipFillToHCCFill(uplo), hipOperationToHCCOperation(trans), + hipDiagonalToHCCDiagonal(diag), m, n, alpha, A, lda, B, ldb, C, ldc)); +} + +hipblasStatus_t hipblas_port_Dtrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, + hipblasOperation_t trans, hipblasDiagType_t diag, int m, int n, const double* alpha, + const double* A, int lda, const double* B, int ldb, double* C, int ldc) +{ + return rocBLASStatusToHIPStatus(rocblas_port_dtrmm( + (rocblas_handle)handle, hipSideToHCCSide(side), hipFillToHCCFill(uplo), hipOperationToHCCOperation(trans), + hipDiagonalToHCCDiagonal(diag), m, n, alpha, A, lda, B, ldb, C, ldc)); +} + +hipblasStatus_t hipblas_port_Ctrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, + hipblasOperation_t trans, hipblasDiagType_t diag, int m, int n, + const hipFloatComplex* alpha, const hipFloatComplex* A, int lda, + const hipFloatComplex* B, int ldb, hipFloatComplex* C, int ldc) +{ + return rocBLASStatusToHIPStatus(rocblas_port_ctrmm( + (rocblas_handle)handle, hipSideToHCCSide(side), hipFillToHCCFill(uplo), hipOperationToHCCOperation(trans), + hipDiagonalToHCCDiagonal(diag), m, n, alpha, A, lda, B, ldb, C, ldc)); +} + +hipblasStatus_t hipblas_port_Ztrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, + hipblasOperation_t trans, hipblasDiagType_t diag, int m, int n, + const hipDoubleComplex* alpha, const hipDoubleComplex* A, int lda, + const hipDoubleComplex* B, int ldb, hipDoubleComplex* C, int ldc) +{ + return rocBLASStatusToHIPStatus(rocblas_port_ztrmm( + (rocblas_handle)handle, hipSideToHCCSide(side), hipFillToHCCFill(uplo), hipOperationToHCCOperation(trans), + hipDiagonalToHCCDiagonal(diag), m, n, alpha, A, lda, B, ldb, C, ldc)); +} + + + +/* + * GER + */ +hipblasStatus_t hipblas_port_Sger(hipblasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, + const float* y, int incy, float* A, int lda) +{ + return rocBLASStatusToHIPStatus(rocblas_port_sger((rocblas_handle)handle, m, n, alpha, x, incx, y, incy, A, lda)); +} + +hipblasStatus_t hipblas_port_Dger(hipblasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, + const double* y, int incy, double* A, int lda) +{ + return rocBLASStatusToHIPStatus(rocblas_port_dger((rocblas_handle)handle, m, n, alpha, x, incx, y, incy, A, lda)); +} + +hipblasStatus_t hipblas_port_Cgeru(hipblasHandle_t handle, int m, int n, const hipFloatComplex* alpha, + const hipFloatComplex* x, int incx, const hipFloatComplex* y, int incy, + hipFloatComplex* A, int lda) +{ + return rocBLASStatusToHIPStatus(rocblas_port_cgeru((rocblas_handle)handle, m, n, alpha, x, incx, y, incy, A, lda)); +} + +hipblasStatus_t hipblas_port_Zgeru(hipblasHandle_t handle, int m, int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* x, int incx, const hipDoubleComplex* y, int incy, + hipDoubleComplex* A, int lda) +{ + return rocBLASStatusToHIPStatus(rocblas_port_zgeru((rocblas_handle)handle, m, n, alpha, x, incx, y, incy, A, lda)); +} + +/* + * AXPY + */ +hipblasStatus_t hipblas_port_Saxpy(hipblasHandle_t handle, int n, const float* alpha, const float* x, int incx, + float* y, int incy) +{ + return rocBLASStatusToHIPStatus(rocblas_port_saxpy((rocblas_handle)handle, n, alpha, x, incx, y, incy)); +} + +hipblasStatus_t hipblas_port_Daxpy(hipblasHandle_t handle, int n, const double* alpha, const double* x, int incx, + double* y, int incy) +{ + return rocBLASStatusToHIPStatus(rocblas_port_daxpy((rocblas_handle)handle, n, alpha, x, incx, y, incy)); +} + +hipblasStatus_t hipblas_port_Caxpy(hipblasHandle_t handle, int n, const hipFloatComplex* alpha, + const hipFloatComplex* x, int incx, hipFloatComplex* y, int incy) +{ + return rocBLASStatusToHIPStatus(rocblas_port_caxpy((rocblas_handle)handle, n, alpha, x, incx, y, incy)); +} + +hipblasStatus_t hipblas_port_Zaxpy(hipblasHandle_t handle, int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* x, int incx, hipDoubleComplex* y, int incy) +{ + return rocBLASStatusToHIPStatus(rocblas_port_zaxpy((rocblas_handle)handle, n, alpha, x, incx, y, incy)); +} diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port.h b/src/SDDK/GPU/hipblas_port/rocblas_port.h new file mode 100644 index 000000000..a8156caaf --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port.h @@ -0,0 +1,116 @@ +#ifndef _ROCBLAS_PORT_H_ +#define _ROCBLAS_PORT_H_ + +#include +#include "rocblas_port/rocblas-types.h" +#include "rocblas_port/handle.h" + +extern "C" { + +/* + * GEMV + */ +rocblas_status rocblas_port_sgemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, + const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, + const float* beta, float* y, rocblas_int incy); + +rocblas_status rocblas_port_dgemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, + const double* alpha, const double* A, rocblas_int lda, const double* x, + rocblas_int incx, const double* beta, double* y, rocblas_int incy); + +rocblas_status rocblas_port_cgemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, + const hipFloatComplex* alpha, const hipFloatComplex* A, rocblas_int lda, + const hipFloatComplex* x, rocblas_int incx, const hipFloatComplex* beta, + hipFloatComplex* y, rocblas_int incy); + +rocblas_status rocblas_port_zgemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, + const hipDoubleComplex* alpha, const hipDoubleComplex* A, rocblas_int lda, + const hipDoubleComplex* x, rocblas_int incx, const hipDoubleComplex* beta, + hipDoubleComplex* y, rocblas_int incy); + +/* + * GEMM + */ +rocblas_status rocblas_port_sgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* A, + rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, + rocblas_int ldc); + +rocblas_status rocblas_port_dgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* A, + rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, + rocblas_int ldc); + +rocblas_status rocblas_port_cgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const hipFloatComplex* alpha, + const hipFloatComplex* A, rocblas_int lda, const hipFloatComplex* B, rocblas_int ldb, + const hipFloatComplex* beta, hipFloatComplex* C, rocblas_int ldc); + +rocblas_status rocblas_port_zgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const hipDoubleComplex* alpha, + const hipDoubleComplex* A, rocblas_int lda, const hipDoubleComplex* B, + rocblas_int ldb, const hipDoubleComplex* beta, hipDoubleComplex* C, rocblas_int ldc); + +/* + * TRMM + */ + +rocblas_status rocblas_port_strmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation trans, + rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, + const float* A, rocblas_int lda, const float* B, rocblas_int ldb, float* C, + rocblas_int ldc); + +rocblas_status rocblas_port_dtrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation trans, + rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, + const double* A, rocblas_int lda, const double* B, rocblas_int ldb, double* C, + rocblas_int ldc); + +rocblas_status rocblas_port_ctrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation trans, + rocblas_diagonal diag, rocblas_int m, rocblas_int n, const hipFloatComplex* alpha, + const hipFloatComplex* A, rocblas_int lda, const hipFloatComplex* B, rocblas_int ldb, + hipFloatComplex* C, rocblas_int ldc); + +rocblas_status rocblas_port_ztrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation trans, + rocblas_diagonal diag, rocblas_int m, rocblas_int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* A, rocblas_int lda, const hipDoubleComplex* B, + rocblas_int ldb, hipDoubleComplex* C, rocblas_int ldc); +/* + * GER + */ + +rocblas_status rocblas_port_sger(rocblas_handle handle, rocblas_int m, rocblas_int n, const float* alpha, + const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* A, + rocblas_int lda); + +rocblas_status rocblas_port_dger(rocblas_handle handle, rocblas_int m, rocblas_int n, const double* alpha, + const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* A, + rocblas_int lda); + +rocblas_status rocblas_port_cgeru(rocblas_handle handle, rocblas_int m, rocblas_int n, const hipFloatComplex* alpha, + const hipFloatComplex* x, rocblas_int incx, const hipFloatComplex* y, + rocblas_int incy, hipFloatComplex* A, rocblas_int lda); + +rocblas_status rocblas_port_zgeru(rocblas_handle handle, rocblas_int m, rocblas_int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* x, rocblas_int incx, const hipDoubleComplex* y, + rocblas_int incy, hipDoubleComplex* A, rocblas_int lda); + +/* + * AXPY + */ +rocblas_status rocblas_port_saxpy(rocblas_handle handle, rocblas_int n, const float* alpha, const float* x, + rocblas_int incx, float* y, rocblas_int incy); + +rocblas_status rocblas_port_daxpy(rocblas_handle handle, rocblas_int n, const double* alpha, const double* x, + rocblas_int incx, double* y, rocblas_int incy); + +rocblas_status rocblas_port_caxpy(rocblas_handle handle, rocblas_int n, const hipFloatComplex* alpha, + const hipFloatComplex* x, rocblas_int incx, hipFloatComplex* y, + rocblas_int incy); + +rocblas_status rocblas_port_zaxpy(rocblas_handle handle, rocblas_int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* x, rocblas_int incx, hipDoubleComplex* y, + rocblas_int incy); + +} // extern "C" + +#endif diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port/definitions.h b/src/SDDK/GPU/hipblas_port/rocblas_port/definitions.h new file mode 100644 index 000000000..4025a3529 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port/definitions.h @@ -0,0 +1,115 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#ifndef DEFINITIONS_H +#define DEFINITIONS_H +#include "rocblas-types.h" +#include "status.h" + +/******************************************************************************* + * Definitions + * this file to not include any others + * thereby it can include top-level definitions included by all + ******************************************************************************/ + +namespace { +// half vectors +typedef _Float16 rocblas_half8 __attribute__((ext_vector_type(8))); +typedef _Float16 rocblas_half2 __attribute__((ext_vector_type(2))); + +#ifndef GOOGLE_TEST // suppress warnings about __device__ when building tests +extern "C" __device__ rocblas_half2 llvm_fma_v2f16(rocblas_half2, + rocblas_half2, + rocblas_half2) __asm("llvm.fma.v2f16"); + +__device__ inline rocblas_half2 +rocblas_fmadd_half2(rocblas_half2 multiplier, rocblas_half2 multiplicand, rocblas_half2 addend) +{ + return llvm_fma_v2f16(multiplier, multiplicand, addend); +} +#endif + +#define RETURN_IF_HIP_ERROR(INPUT_STATUS_FOR_CHECK) \ + do \ + { \ + hipError_t TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ + if(TMP_STATUS_FOR_CHECK != hipSuccess) \ + { \ + return get_rocblas_status_for_hip_status(TMP_STATUS_FOR_CHECK); \ + } \ + } while(0) + +#define RETURN_IF_ROCBLAS_ERROR(INPUT_STATUS_FOR_CHECK) \ + do \ + { \ + rocblas_status TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ + if(TMP_STATUS_FOR_CHECK != rocblas_status_success) \ + { \ + return TMP_STATUS_FOR_CHECK; \ + } \ + } while(0) + +#define THROW_IF_HIP_ERROR(INPUT_STATUS_FOR_CHECK) \ + do \ + { \ + hipError_t TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ + if(TMP_STATUS_FOR_CHECK != hipSuccess) \ + { \ + throw get_rocblas_status_for_hip_status(TMP_STATUS_FOR_CHECK); \ + } \ + } while(0) + +#define THROW_IF_ROCBLAS_ERROR(INPUT_STATUS_FOR_CHECK) \ + do \ + { \ + rocblas_status TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ + if(TMP_STATUS_FOR_CHECK != rocblas_status_success) \ + { \ + throw TMP_STATUS_FOR_CHECK; \ + } \ + } while(0) + +#define PRINT_IF_HIP_ERROR(INPUT_STATUS_FOR_CHECK) \ + do \ + { \ + hipError_t TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ + if(TMP_STATUS_FOR_CHECK != hipSuccess) \ + { \ + fprintf(stderr, \ + "hip error code: %d at %s:%d\n", \ + TMP_STATUS_FOR_CHECK, \ + __FILE__, \ + __LINE__); \ + } \ + } while(0) + +#define PRINT_IF_ROCBLAS_ERROR(INPUT_STATUS_FOR_CHECK) \ + do \ + { \ + rocblas_status TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ + if(TMP_STATUS_FOR_CHECK != rocblas_status_success) \ + { \ + fprintf(stderr, \ + "rocblas error code: %d at %s:%d\n", \ + TMP_STATUS_FOR_CHECK, \ + __FILE__, \ + __LINE__); \ + } \ + } while(0) + +} +#endif // DEFINITIONS_H diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port/handle.h b/src/SDDK/GPU/hipblas_port/rocblas_port/handle.h new file mode 100644 index 000000000..08cebcfaa --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port/handle.h @@ -0,0 +1,246 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#ifndef HANDLE_H +#define HANDLE_H + +#include +#include +#include "rocblas-types.h" +#include "definitions.h" +#include + +/******************************************************************************* + * \brief rocblas_handle is a structure holding the rocblas library context. + * It must be initialized using rocblas_create_handle() and the returned handle mus + * It should be destroyed at the end using rocblas_destroy_handle(). + * Exactly like CUBLAS, ROCBLAS only uses one stream for one API routine +******************************************************************************/ +namespace { +struct _rocblas_handle +{ + _rocblas_handle(); + ~_rocblas_handle(); + + /******************************************************************************* + * Exactly like CUBLAS, ROCBLAS only uses one stream for one API routine + ******************************************************************************/ + + /******************************************************************************* + * set stream: + This API assumes user has already created a valid stream + Associate the following rocblas API call with this user provided stream + ******************************************************************************/ + rocblas_status set_stream(hipStream_t user_stream) + { + // TODO: check the user_stream valid or not + rocblas_stream = user_stream; + return rocblas_status_success; + } + + /******************************************************************************* + * get stream + ******************************************************************************/ + rocblas_status get_stream(hipStream_t* stream) const + { + *stream = rocblas_stream; + return rocblas_status_success; + } + + // trsm get pointers + void* get_trsm_Y() const { return trsm_Y; } + void* get_trsm_invA() const { return trsm_invA; } + void* get_trsm_invA_C() const { return trsm_invA_C; } + + // trsv get pointers + void* get_trsv_x() const { return trsv_x; } + void* get_trsv_alpha() const { return trsv_alpha; } + + rocblas_int device; + hipDeviceProp_t device_properties; + + // rocblas by default take the system default stream 0 users cannot create + hipStream_t rocblas_stream = 0; + + // default pointer_mode is on host + rocblas_pointer_mode pointer_mode = rocblas_pointer_mode_host; + + // space allocated for trsm + void* trsm_Y = nullptr; + void* trsm_invA = nullptr; + void* trsm_invA_C = nullptr; + + // space allocated for trsv + void* trsv_x = nullptr; + void* trsv_alpha = nullptr; + + // default logging_mode is no logging + static rocblas_layer_mode layer_mode; + + // logging streams + static std::ofstream log_trace_ofs; + static std::ostream* log_trace_os; + static std::ofstream log_bench_ofs; + static std::ostream* log_bench_os; + static std::ofstream log_profile_ofs; + static std::ostream* log_profile_os; + + // static data for startup initialization + static struct init + { + init(); + } handle_init; +}; + + +// work buffer size constants +constexpr size_t WORKBUF_TRSM_A_BLKS = 10; +constexpr size_t WORKBUF_TRSM_B_CHNK = 32000; +constexpr size_t WORKBUF_TRSM_Y_SZ = 32000 * 128 * sizeof(double); +constexpr size_t WORKBUF_TRSM_INVA_SZ = 128 * 128 * 10 * sizeof(double); +constexpr size_t WORKBUF_TRSM_INVA_C_SZ = 128 * 128 * 10 * sizeof(double) / 2; +constexpr size_t WORKBUF_TRSV_X_SZ = 131072 * sizeof(double); +constexpr size_t WORKBUF_TRSV_ALPHA_SZ = sizeof(double); + +/******************************************************************************* + * constructor + ******************************************************************************/ +_rocblas_handle::_rocblas_handle() +{ + // default device is active device + THROW_IF_HIP_ERROR(hipGetDevice(&device)); + THROW_IF_HIP_ERROR(hipGetDeviceProperties(&device_properties, device)); + + // rocblas by default take the system default stream 0 users cannot create + + // allocate trsm temp buffers + THROW_IF_HIP_ERROR(hipMalloc(&trsm_Y, WORKBUF_TRSM_Y_SZ)); + THROW_IF_HIP_ERROR(hipMalloc(&trsm_invA, WORKBUF_TRSM_INVA_SZ)); + THROW_IF_HIP_ERROR(hipMalloc(&trsm_invA_C, WORKBUF_TRSM_INVA_C_SZ)); + + // allocate trsv temp buffers + THROW_IF_HIP_ERROR(hipMalloc(&trsv_x, WORKBUF_TRSV_X_SZ)); + THROW_IF_HIP_ERROR(hipMalloc(&trsv_alpha, WORKBUF_TRSV_ALPHA_SZ)); +} + +/******************************************************************************* + * destructor + ******************************************************************************/ +_rocblas_handle::~_rocblas_handle() +{ + if(trsm_Y) + hipFree(trsm_Y); + if(trsm_invA) + hipFree(trsm_invA); + if(trsm_invA_C) + hipFree(trsm_invA_C); + if(trsv_x) + hipFree(trsv_x); + if(trsv_alpha) + hipFree(trsv_alpha); +} + +/******************************************************************************* + * Static handle data + ******************************************************************************/ +rocblas_layer_mode _rocblas_handle::layer_mode = rocblas_layer_mode_none; +std::ofstream _rocblas_handle::log_trace_ofs; +std::ostream* _rocblas_handle::log_trace_os; +std::ofstream _rocblas_handle::log_bench_ofs; +std::ostream* _rocblas_handle::log_bench_os; +std::ofstream _rocblas_handle::log_profile_ofs; +std::ostream* _rocblas_handle::log_profile_os; +_rocblas_handle::init _rocblas_handle::handle_init; + +/** + * @brief Logging function + * + * @details + * open_log_stream Open stream log_os for logging. + * If the environment variable with name environment_variable_name + * is not set, then stream log_os to std::cerr. + * Else open a file at the full logfile path contained in + * the environment variable. + * If opening the file suceeds, stream to the file + * else stream to std::cerr. + * + * @param[in] + * environment_variable_name const char* + * Name of environment variable that contains + * the full logfile path. + * + * @parm[out] + * log_os std::ostream*& + * Output stream. Stream to std:cerr if environment_variable_name + * is not set, else set to stream to log_ofs + * + * @parm[out] + * log_ofs std::ofstream& + * Output file stream. If log_ofs->is_open()==true, then log_os + * will stream to log_ofs. Else it will stream to std::cerr. + */ + +static void open_log_stream(const char* environment_variable_name, + std::ostream*& log_os, + std::ofstream& log_ofs) + +{ + // By default, output to cerr + log_os = &std::cerr; + + // if environment variable is set, open file at logfile_pathname contained in the + // environment variable + auto logfile_pathname = getenv(environment_variable_name); + if(logfile_pathname) + { + log_ofs.open(logfile_pathname, std::ios_base::trunc); + + // if log_ofs is open, then stream to log_ofs, else log_os is already set to std::cerr + if(log_ofs.is_open()) + log_os = &log_ofs; + } +} + +/******************************************************************************* + * Static runtime initialization + ******************************************************************************/ +_rocblas_handle::init::init() +{ + // set layer_mode from value of environment variable ROCBLAS_LAYER + auto str_layer_mode = getenv("ROCBLAS_LAYER"); + if(str_layer_mode) + { + layer_mode = static_cast(strtol(str_layer_mode, 0, 0)); + + // open log_trace file + if(layer_mode & rocblas_layer_mode_log_trace) + open_log_stream("ROCBLAS_LOG_TRACE_PATH", log_trace_os, log_trace_ofs); + + // open log_bench file + if(layer_mode & rocblas_layer_mode_log_bench) + open_log_stream("ROCBLAS_LOG_BENCH_PATH", log_bench_os, log_bench_ofs); + + // open log_profile file + if(layer_mode & rocblas_layer_mode_log_profile) + open_log_stream("ROCBLAS_LOG_PROFILE_PATH", log_profile_os, log_profile_ofs); + } +} + + +} + +#endif diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port/port_helper_func.h b/src/SDDK/GPU/hipblas_port/rocblas_port/port_helper_func.h new file mode 100644 index 000000000..4d76ffa0c --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port/port_helper_func.h @@ -0,0 +1,61 @@ +#ifndef _PORT_HELPER_FUNC_H_ +#define _PORT_HELPER_FUNC_H_ + +#include +#include "rocblas-types.h" + +namespace { + +/* + * Check if real only and cmp value + */ +template +__host__ __device__ inline bool rb_port_cmp_and_real_only(const T& a, const U& val) { return a == val; } + +template +__host__ __device__ inline bool rb_port_cmp_and_real_only(const hipDoubleComplex& a, const T& val) { return a.x == val && a.y == 0; } + +template +__host__ __device__ inline bool rb_port_cmp_and_real_only(const hipFloatComplex& a, const T& val) { return a.x == val && a.y == 0; } + +/* + * Conjugate helper functions + */ +template +struct rb_port_conj_op { + __host__ __device__ static inline T eval(const T& val) { return val; } +}; + +template<> +struct rb_port_conj_op { + __host__ __device__ static inline hipDoubleComplex eval(const hipDoubleComplex& val) { + return hipDoubleComplex(val.x, -val.y); + } +}; + +template<> +struct rb_port_conj_op { + __host__ __device__ static inline hipFloatComplex eval(const hipFloatComplex& val) { + return hipFloatComplex(val.x, -val.y); + } +}; + +/* + * Swap of leading dimension / increment for transposed matrices + */ +template +struct MatrixDim { + __host__ __device__ static inline rocblas_int ld(const rocblas_int& ld, const rocblas_int& inc) { return inc; } + __host__ __device__ static inline rocblas_int inc(const rocblas_int& ld, const rocblas_int& inc) { return ld; } +}; + +template<> +struct MatrixDim { + __host__ __device__ static inline rocblas_int ld(const rocblas_int& ld, const rocblas_int& inc) { return ld; } + __host__ __device__ static inline rocblas_int inc(const rocblas_int& ld, const rocblas_int& inc) { return inc; } +}; + + +} + +#endif diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port/port_hip_roc_translation.h b/src/SDDK/GPU/hipblas_port/rocblas_port/port_hip_roc_translation.h new file mode 100644 index 000000000..fab9a91ca --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port/port_hip_roc_translation.h @@ -0,0 +1,258 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef _PORT_HIP_ROC_TRANSLATION_ +#define _PORT_HIP_ROC_TRANSLATION_ + +#include + +namespace { + +rocblas_operation_ hipOperationToHCCOperation(hipblasOperation_t op) +{ + switch (op) { + case HIPBLAS_OP_N: + return rocblas_operation_none; + case HIPBLAS_OP_T: + return rocblas_operation_transpose; + case HIPBLAS_OP_C: + return rocblas_operation_conjugate_transpose; + default: + throw "Non existent OP"; + } +} + +hipblasOperation_t HCCOperationToHIPOperation(rocblas_operation_ op) +{ + switch (op) { + case rocblas_operation_none: + return HIPBLAS_OP_N; + case rocblas_operation_transpose: + return HIPBLAS_OP_T; + case rocblas_operation_conjugate_transpose: + return HIPBLAS_OP_C; + default: + throw "Non existent OP"; + } +} + +rocblas_fill_ hipFillToHCCFill(hipblasFillMode_t fill) +{ + switch (fill) { + case HIPBLAS_FILL_MODE_UPPER: + return rocblas_fill_upper; + case HIPBLAS_FILL_MODE_LOWER: + return rocblas_fill_lower; + case HIPBLAS_FILL_MODE_FULL: + return rocblas_fill_full; + default: + throw "Non existent FILL"; + } +} + +hipblasFillMode_t HCCFillToHIPFill(rocblas_fill_ fill) +{ + switch (fill) { + case rocblas_fill_upper: + return HIPBLAS_FILL_MODE_UPPER; + case rocblas_fill_lower: + return HIPBLAS_FILL_MODE_LOWER; + case rocblas_fill_full: + return HIPBLAS_FILL_MODE_FULL; + default: + throw "Non existent FILL"; + } +} + +rocblas_diagonal_ hipDiagonalToHCCDiagonal(hipblasDiagType_t diagonal) +{ + switch (diagonal) { + case HIPBLAS_DIAG_NON_UNIT: + return rocblas_diagonal_non_unit; + case HIPBLAS_DIAG_UNIT: + return rocblas_diagonal_unit; + default: + throw "Non existent DIAGONAL"; + } +} + +hipblasDiagType_t HCCDiagonalToHIPDiagonal(rocblas_diagonal_ diagonal) +{ + switch (diagonal) { + case rocblas_diagonal_non_unit: + return HIPBLAS_DIAG_NON_UNIT; + case rocblas_diagonal_unit: + return HIPBLAS_DIAG_UNIT; + default: + throw "Non existent DIAGONAL"; + } +} + +rocblas_side_ hipSideToHCCSide(hipblasSideMode_t side) +{ + switch (side) { + case HIPBLAS_SIDE_LEFT: + return rocblas_side_left; + case HIPBLAS_SIDE_RIGHT: + return rocblas_side_right; + case HIPBLAS_SIDE_BOTH: + return rocblas_side_both; + default: + throw "Non existent SIDE"; + } +} + +hipblasSideMode_t HCCSideToHIPSide(rocblas_side_ side) +{ + switch (side) { + case rocblas_side_left: + return HIPBLAS_SIDE_LEFT; + case rocblas_side_right: + return HIPBLAS_SIDE_RIGHT; + case rocblas_side_both: + return HIPBLAS_SIDE_BOTH; + default: + throw "Non existent SIDE"; + } +} + +rocblas_pointer_mode HIPPointerModeToRocblasPointerMode(hipblasPointerMode_t mode) +{ + switch (mode) { + case HIPBLAS_POINTER_MODE_HOST: + return rocblas_pointer_mode_host; + + case HIPBLAS_POINTER_MODE_DEVICE: + return rocblas_pointer_mode_device; + + default: + throw "Non existent PointerMode"; + } +} + +hipblasPointerMode_t RocblasPointerModeToHIPPointerMode(rocblas_pointer_mode mode) +{ + switch (mode) { + case rocblas_pointer_mode_host: + return HIPBLAS_POINTER_MODE_HOST; + + case rocblas_pointer_mode_device: + return HIPBLAS_POINTER_MODE_DEVICE; + + default: + throw "Non existent PointerMode"; + } +} + +rocblas_datatype HIPDatatypeToRocblasDatatype(hipblasDatatype_t type) +{ + switch (type) { + case HIPBLAS_R_16F: + return rocblas_datatype_f16_r; + + case HIPBLAS_R_32F: + return rocblas_datatype_f32_r; + + case HIPBLAS_R_64F: + return rocblas_datatype_f64_r; + + case HIPBLAS_C_16F: + return rocblas_datatype_f16_c; + + case HIPBLAS_C_32F: + return rocblas_datatype_f32_c; + + case HIPBLAS_C_64F: + return rocblas_datatype_f64_c; + + default: + throw "Non existant DataType"; + } +} + +hipblasDatatype_t RocblasDatatypeToHIPDatatype(rocblas_datatype type) +{ + switch (type) { + case rocblas_datatype_f16_r: + return HIPBLAS_R_16F; + + case rocblas_datatype_f32_r: + return HIPBLAS_R_32F; + + case rocblas_datatype_f64_r: + return HIPBLAS_R_64F; + + case rocblas_datatype_f16_c: + return HIPBLAS_C_16F; + + case rocblas_datatype_f32_c: + return HIPBLAS_C_32F; + + case rocblas_datatype_f64_c: + return HIPBLAS_C_64F; + + default: + throw "Non existant DataType"; + } +} + +rocblas_gemm_algo HIPGemmAlgoToRocblasGemmAlgo(hipblasGemmAlgo_t algo) +{ + switch (algo) { + case HIPBLAS_GEMM_DEFAULT: + return rocblas_gemm_algo_standard; + + default: + throw "Non existant GemmAlgo"; + } +} + +hipblasGemmAlgo_t RocblasGemmAlgoToHIPGemmAlgo(rocblas_gemm_algo algo) +{ + switch (algo) { + case rocblas_gemm_algo_standard: + return HIPBLAS_GEMM_DEFAULT; + + default: + throw "Non existant GemmAlgo"; + } +} + +hipblasStatus_t rocBLASStatusToHIPStatus(rocblas_status_ error) +{ + switch (error) { + case rocblas_status_success: + return HIPBLAS_STATUS_SUCCESS; + case rocblas_status_invalid_handle: + return HIPBLAS_STATUS_NOT_INITIALIZED; + case rocblas_status_not_implemented: + return HIPBLAS_STATUS_NOT_SUPPORTED; + case rocblas_status_invalid_pointer: + return HIPBLAS_STATUS_INVALID_VALUE; + case rocblas_status_invalid_size: + return HIPBLAS_STATUS_INVALID_VALUE; + case rocblas_status_memory_error: + return HIPBLAS_STATUS_ALLOC_FAILED; + case rocblas_status_internal_error: + return HIPBLAS_STATUS_INTERNAL_ERROR; + default: + throw "Unimplemented status"; + } +} + +} + +#endif diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port/reduction.h b/src/SDDK/GPU/hipblas_port/rocblas_port/reduction.h new file mode 100644 index 000000000..e4b4d0455 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port/reduction.h @@ -0,0 +1,313 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef REDUCTION_H_ +#define REDUCTION_H_ + +#include "rocblas-types.h" +#include "definitions.h" +#include "handle.h" + +/* + * =========================================================================== + * This file provide common device function used in various BLAS routines + * =========================================================================== + */ + +// BLAS Level 1 includes routines and functions performing vector-vector +// operations. Most BLAS 1 routines are about reduction: compute the norm, +// calculate the dot production of two vectors, find the maximum/minimum index +// of the element of the vector. As you may observed, although the computation +// type is different, the core algorithm is the same: scan all element of the +// vector(s) and reduce to one single result. +// +// The reduction algorithm on GPU is called [parallel +// reduction](https://raw.githubusercontent.com/mateuszbuda/GPUExample/master/reduce3.png) +// which is adopted in rocBLAS. At the beginning, all the threads in the thread +// block participate. After each step of reduction (like a tree), the number of +// participating threads decrease by half. At the end of the parallel reduction, +// only one thread (usually thread 0) owns the result in its thread block. +// +// Classically, the BLAS 1 reduction needs more than one GPU kernel to finish, +// because the lack of global synchronization of thread blocks without exiting +// the kernel. The first kernels gather partial results, write into a temporary +// working buffer. The second kernel finishes the final reduction. +// +// For example, BLAS 1 routine i*amax is to find index of the maximum absolute +// value element of a vector. In this routine: +// +// Kernel 1: launch many thread block as needed. Each thread block works on a +// subset of the vector. Each thread block use the parallel reduction to find a +// local index with the maximum absolute value of the subset. There are +// number-of-the-thread-blocks local results.The results are written into a +// temporary working buffer. The working buffer has number-of-the-thread-blocks +// elements. +// +// Kernel 2: launch only one thread block which reads the temporary work buffer and +// reduces to final result still with the parallel reduction. +// +// As you may see, if there is a mechanism to synchronize all the thread blocks +// after local index is obtained in kernel 1 (without ending the kernel), then +// Kernel 2's computation can be merged into Kernel 1. One such mechanism is called +// atomic operation. However, atomic operation is new and is not used in rocBLAS +// yet. rocBLAS still use the classic standard parallel reduction right now. + +namespace { +// Recursively compute reduction +template +struct rocblas_reduction_s +{ + __forceinline__ __device__ void operator()(rocblas_int tx, T* x) + { + // Reduce the lower half with the upper half + if(tx < k) + REDUCE{}(x[tx], x[tx + k]); + __syncthreads(); + + // Recurse down with k / 2 + rocblas_reduction_s{}(tx, x); + } +}; + +// leaf node for terminating recursion +template +struct rocblas_reduction_s<0, REDUCE, T> +{ + __forceinline__ __device__ void operator()(rocblas_int tx, T* x) {} +}; + +/*! \brief general parallel reduction + + \details + + @param[in] + n rocblas_int. assume a power of 2 + @param[in] + T element type of vector x + @param[in] + REDUCE reduction functor + @param[in] + tx rocblas_int. thread id + @param[inout] + x pointer storing vector x on the GPU. + usually x is stored in shared memory; + x[0] store the final result. + ********************************************************************/ +template +__attribute__((flatten)) __device__ void rocblas_reduction(rocblas_int tx, T* x) +{ + static_assert(NB > 1 && !(NB & (NB - 1)), "NB must be a power of 2"); + __syncthreads(); + rocblas_reduction_s{}(tx, x); +} + +/*! \brief parallel reduction: sum + + \details + + @param[in] + n rocblas_int. assume a power of 2 + @param[in] + tx rocblas_int. thread id + @param[inout] + x pointer storing vector x on the GPU. + usually x is stored in shared memory; + x[0] store the final result. + ********************************************************************/ +struct rocblas_reduce_sum +{ + template + __forceinline__ __device__ void operator()(T& __restrict__ a, const T& __restrict__ b) + { + a += b; + } +}; + +template +__attribute__((flatten)) __device__ void rocblas_sum_reduce(rocblas_int tx, T* x) +{ + rocblas_reduction(tx, x); +} +// end sum_reduce + +// Identity finalizer +struct rocblas_finalize_identity +{ + template + __forceinline__ __host__ __device__ T&& operator()(T&& x) + { + return std::forward(x); // Perfect identity, preserving valueness + } +}; + +// Emulates value initialization T{}. Allows specialization for certain types. +template +struct default_value +{ + __forceinline__ __host__ __device__ constexpr T operator()() const { return {}; } +}; + +// kennel 1 writes partial results per thread block in workspace; number of partial results is +// blocks +template +__global__ void +rocblas_reduction_kernel_part1(rocblas_int n, const Ti* x, rocblas_int incx, To* workspace) +{ + ssize_t tx = hipThreadIdx_x; + ssize_t tid = hipBlockIdx_x * hipBlockDim_x + tx; + __shared__ To tmp[NB]; + + // bound + if(tid < n) + tmp[tx] = FETCH{}(x[tid * incx], tid); + else + tmp[tx] = default_value{}(); // pad with default value + + rocblas_reduction(tx, tmp); + + if(tx == 0) + workspace[hipBlockIdx_x] = tmp[0]; +} + +// kernel 2 gathers all the partial results in workspace and finishes the final reduction; +// number of threads (NB) loop blocks +template +__global__ void rocblas_reduction_kernel_part2(rocblas_int nblocks, To* workspace, Tr* result) +{ + rocblas_int tx = hipThreadIdx_x; + __shared__ To tmp[NB]; + + if(tx < nblocks) + { + tmp[tx] = workspace[tx]; + + // bound, loop + for(rocblas_int i = tx + NB; i < nblocks; i += NB) + REDUCE{}(tmp[tx], workspace[i]); + } + else + { // pad with default value + tmp[tx] = default_value{}(); + } + + if(nblocks < 32) + { + // no need parallel reduction + __syncthreads(); + + if(tx == 0) + for(rocblas_int i = 1; i < nblocks; i++) + REDUCE{}(tmp[0], tmp[i]); + } + else + { + // parallel reduction + rocblas_reduction(tx, tmp); + } + + // Store result on device or in workspace + if(tx == 0) + *result = FINALIZE{}(tmp[0]); +} + +// At least two kernels are needed to finish the reduction +// kennel 1 write partial result per thread block in workspace, blocks partial results +// kernel 2 gathers all the partial result in workspace and finishes the final reduction. +template +rocblas_status rocblas_reduction_kernel(rocblas_handle __restrict__ handle, + rocblas_int n, + const Ti* x, + rocblas_int incx, + Tr* result, + To* workspace, + rocblas_int blocks) +{ + hipLaunchKernelGGL((rocblas_reduction_kernel_part1), + blocks, + NB, + 0, + handle->rocblas_stream, + n, + x, + incx, + workspace); + + if(handle->pointer_mode == rocblas_pointer_mode_device) + { + hipLaunchKernelGGL((rocblas_reduction_kernel_part2), + 1, + NB, + 0, + handle->rocblas_stream, + blocks, + workspace, + result); + } + else + { + // If in host pointer mode, workspace is converted to Tr* and the result is + // placed there, and then copied from device to host. If To is a class type, + // it must be a standard layout type and its first member must be of type Tr. + static_assert(std::is_standard_layout(), "To must be a standard layout type"); + + if(blocks > 1) + { + hipLaunchKernelGGL((rocblas_reduction_kernel_part2), + 1, + NB, + 0, + handle->rocblas_stream, + blocks, + workspace, + (Tr*)workspace); + } + + if(std::is_same() || blocks > 1) + { + // If FINALIZE is trivial or kernel part2 was called, result is in the + // beginning of workspace[0], and can be copied directly. + RETURN_IF_HIP_ERROR(hipMemcpy(result, workspace, sizeof(Tr), hipMemcpyDeviceToHost)); + } + else + { + // If FINALIZE is not trivial and kernel part2 was not called, then + // workspace[0] needs to be finalized on host. + To res; + RETURN_IF_HIP_ERROR(hipMemcpy(&res, workspace, sizeof(To), hipMemcpyDeviceToHost)); + *result = FINALIZE{}(res); + } + } + + return rocblas_status_success; +} + +} + +#endif diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port/rocblas-types.h b/src/SDDK/GPU/hipblas_port/rocblas_port/rocblas-types.h new file mode 100644 index 000000000..6c38fde07 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port/rocblas-types.h @@ -0,0 +1,151 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +/*! \file + * \brief rocblas-types.h defines data types used by rocblas + */ + +#ifndef _ROCBLAS_TYPES_H_ +#define _ROCBLAS_TYPES_H_ + +#include +#include +#include + +namespace { + +// integer types +/*! \brief To specify whether int32 or int64 is used + */ +#if defined(rocblas_ILP64) +typedef int64_t rocblas_int; +typedef int64_t rocblas_long; +#else +typedef int32_t rocblas_int; +typedef int64_t rocblas_long; +#endif +// complex types +typedef float2 rocblas_float_complex; +typedef double2 rocblas_double_complex; +// half types +typedef uint16_t rocblas_half; +typedef float2 rocblas_half_complex; + +typedef struct _rocblas_handle* rocblas_handle; + +/* ============================================================================================ */ + +/*! parameter constants. + * numbering is consistent with CBLAS, ACML and most standard C BLAS libraries + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*! \brief Used to specify whether the matrix is to be transposed or not. */ +typedef enum rocblas_operation_ { + rocblas_operation_none = 111, /**< Operate with the matrix. */ + rocblas_operation_transpose = 112, /**< Operate with the transpose of the matrix. */ + rocblas_operation_conjugate_transpose = + 113 /**< Operate with the conjugate transpose of the matrix. */ +} rocblas_operation; + +/*! \brief Used by the Hermitian, symmetric and triangular matrix + * routines to specify whether the upper or lower triangle is being referenced. + */ +typedef enum rocblas_fill_ { + rocblas_fill_upper = 121, /**< Upper triangle. */ + rocblas_fill_lower = 122, /**< Lower triangle. */ + rocblas_fill_full = 123 +} rocblas_fill; + +/*! \brief It is used by the triangular matrix routines to specify whether the + * matrix is unit triangular. + */ +typedef enum rocblas_diagonal_ { + rocblas_diagonal_non_unit = 131, /**< Non-unit triangular. */ + rocblas_diagonal_unit = 132, /**< Unit triangular. */ +} rocblas_diagonal; + +/*! \brief Indicates the side matrix A is located relative to matrix B during multiplication. */ +typedef enum rocblas_side_ { + rocblas_side_left = 141, /**< Multiply general matrix by symmetric, + Hermitian or triangular matrix on the left. */ + rocblas_side_right = 142, /**< Multiply general matrix by symmetric, + Hermitian or triangular matrix on the right. */ + rocblas_side_both = 143 +} rocblas_side; + +/* ============================================================================================ */ +/** + * @brief rocblas status codes definition + */ +typedef enum rocblas_status_ { + rocblas_status_success = 0, /**< success */ + rocblas_status_invalid_handle = 1, /**< handle not initialized, invalid or null */ + rocblas_status_not_implemented = 2, /**< function is not implemented */ + rocblas_status_invalid_pointer = 3, /**< invalid pointer parameter */ + rocblas_status_invalid_size = 4, /**< invalid size parameter */ + rocblas_status_memory_error = 5, /**< failed internal memory allocation, copy or dealloc */ + rocblas_status_internal_error = 6, /**< other internal library failure */ +} rocblas_status; + +/*! \brief Indicates the precision width of data stored in a blas type. */ +typedef enum rocblas_datatype_ { + rocblas_datatype_f16_r = 150, + rocblas_datatype_f32_r = 151, + rocblas_datatype_f64_r = 152, + rocblas_datatype_f16_c = 153, + rocblas_datatype_f32_c = 154, + rocblas_datatype_f64_c = 155, + rocblas_datatype_i8_r = 160, + rocblas_datatype_u8_r = 161, + rocblas_datatype_i32_r = 162, + rocblas_datatype_u32_r = 163, + rocblas_datatype_i8_c = 164, + rocblas_datatype_u8_c = 165, + rocblas_datatype_i32_c = 166, + rocblas_datatype_u32_c = 167, +} rocblas_datatype; + +/*! \brief Indicates the pointer is device pointer or host pointer */ +typedef enum rocblas_pointer_mode_ { + rocblas_pointer_mode_host = 0, + rocblas_pointer_mode_device = 1 +} rocblas_pointer_mode; + +/*! \brief Indicates if layer is active with bitmask*/ +typedef enum rocblas_layer_mode_ { + rocblas_layer_mode_none = 0b0000000000, + rocblas_layer_mode_log_trace = 0b0000000001, + rocblas_layer_mode_log_bench = 0b0000000010, + rocblas_layer_mode_log_profile = 0b0000000100, +} rocblas_layer_mode; + +/*! \brief Indicates if layer is active with bitmask*/ +typedef enum rocblas_gemm_algo_ { + rocblas_gemm_algo_standard = 0b0000000000, +} rocblas_gemm_algo; + +#ifdef __cplusplus +} +#endif + +} + +#endif diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port/status.h b/src/SDDK/GPU/hipblas_port/rocblas_port/status.h new file mode 100644 index 000000000..f0627eff6 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port/status.h @@ -0,0 +1,64 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef STATUS_H_ +#define STATUS_H_ + +#include +#include "rocblas-types.h" +#include "status.h" + +/******************************************************************************* + * \brief convert hipError_t to rocblas_status + * TODO - enumerate library calls to hip runtime, enumerate possible errors from those calls + ******************************************************************************/ + +namespace { +inline rocblas_status get_rocblas_status_for_hip_status(hipError_t status) +{ + switch(status) + { + // success + case hipSuccess: + return rocblas_status_success; + + // internal hip memory allocation + case hipErrorMemoryAllocation: + case hipErrorLaunchOutOfResources: + return rocblas_status_memory_error; + + // user-allocated hip memory + case hipErrorInvalidDevicePointer: // hip memory + return rocblas_status_invalid_pointer; + + // user-allocated device, stream, event + case hipErrorInvalidDevice: + case hipErrorInvalidResourceHandle: + return rocblas_status_invalid_handle; + + // library using hip incorrectly + case hipErrorInvalidValue: + return rocblas_status_internal_error; + + // hip runtime failing + case hipErrorNoDevice: // no hip devices + case hipErrorUnknown: + default: return rocblas_status_internal_error; + } +} + +} +#endif diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port/utility.h b/src/SDDK/GPU/hipblas_port/rocblas_port/utility.h new file mode 100644 index 000000000..006b48096 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port/utility.h @@ -0,0 +1,151 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#ifndef UTILITY_H +#define UTILITY_H + +#include "rocblas-types.h" + +namespace { + +#ifndef GOOGLE_TEST + +// Load a scalar. If the argument is a pointer, dereference it; otherwise copy +// it. Allows the same kernels to be used for host and device scalars. + +// For host scalars +template +__forceinline__ __device__ __host__ T load_scalar(T x) +{ + return x; +} + +// For device scalars +template +__forceinline__ __device__ __host__ T load_scalar(const T* xp) +{ + return *xp; +} + +// For rocblas_half2, we broadcast a fp16 across two halves +template <> +__forceinline__ __device__ __host__ rocblas_half2 load_scalar(const rocblas_half2* xp) +{ + auto x = *reinterpret_cast(xp); + return {x, x}; +} + +#endif // GOOGLE_TEST + +inline bool isAligned(const void* pointer, size_t byte_count) +{ + return reinterpret_cast(pointer) % byte_count == 0; +} + +// clang-format off +// return letter N,T,C in place of rocblas_operation enum +constexpr auto rocblas_transpose_letter(rocblas_operation trans) +{ + switch(trans) + { + case rocblas_operation_none: return 'N'; + case rocblas_operation_transpose: return 'T'; + case rocblas_operation_conjugate_transpose: return 'C'; + default: return ' '; + } +} + +// return letter L, R, B in place of rocblas_side enum +constexpr auto rocblas_side_letter(rocblas_side side) +{ + switch(side) + { + case rocblas_side_left: return 'L'; + case rocblas_side_right: return 'R'; + case rocblas_side_both: return 'B'; + default: return ' '; + } +} + +// return letter U, L, B in place of rocblas_fill enum +constexpr auto rocblas_fill_letter(rocblas_fill fill) +{ + switch(fill) + { + case rocblas_fill_upper: return 'U'; + case rocblas_fill_lower: return 'L'; + case rocblas_fill_full: return 'F'; + default: return ' '; + } +} + +// return letter N, U in place of rocblas_diagonal enum +constexpr auto rocblas_diag_letter(rocblas_diagonal diag) +{ + switch(diag) + { + case rocblas_diagonal_non_unit: return 'N'; + case rocblas_diagonal_unit: return 'U'; + default: return ' '; + } +} + +// return precision string for rocblas_datatype +constexpr auto rocblas_datatype_string(rocblas_datatype type) +{ + switch(type) + { + case rocblas_datatype_f16_r: return "f16_r"; + case rocblas_datatype_f32_r: return "f32_r"; + case rocblas_datatype_f64_r: return "f64_r"; + case rocblas_datatype_f16_c: return "f16_k"; + case rocblas_datatype_f32_c: return "f32_c"; + case rocblas_datatype_f64_c: return "f64_c"; + case rocblas_datatype_i8_r: return "i8_r"; + case rocblas_datatype_u8_r: return "u8_r"; + case rocblas_datatype_i32_r: return "i32_r"; + case rocblas_datatype_u32_r: return "u32_r"; + case rocblas_datatype_i8_c: return "i8_c"; + case rocblas_datatype_u8_c: return "u8_c"; + case rocblas_datatype_i32_c: return "i32_c"; + case rocblas_datatype_u32_c: return "u32_c"; + default: return "invalid"; + } +} + +// return precision string for data type +template constexpr char rocblas_precision_string [] = "invalid"; +template <> constexpr char rocblas_precision_string[] = "f16_r"; +template <> constexpr char rocblas_precision_string[] = "f32_r"; +template <> constexpr char rocblas_precision_string[] = "f64_r"; +template <> constexpr char rocblas_precision_string[] = "i8_r"; +template <> constexpr char rocblas_precision_string[] = "u8_r"; +template <> constexpr char rocblas_precision_string[] = "i32_r"; +template <> constexpr char rocblas_precision_string[] = "u32_r"; +template <> constexpr char rocblas_precision_string[] = "f32_c"; +template <> constexpr char rocblas_precision_string[] = "f64_c"; +#if 0 // Not implemented +template <> constexpr char rocblas_precision_string[] = "f16_c"; +template <> constexpr char rocblas_precision_string[] = "i8_c"; +template <> constexpr char rocblas_precision_string[] = "u8_c"; +template <> constexpr char rocblas_precision_string[] = "i32_c"; +template <> constexpr char rocblas_precision_string[] = "u32_c"; +#endif + +} +// clang-format on +#endif diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port_axpy.hip.cpp b/src/SDDK/GPU/hipblas_port/rocblas_port_axpy.hip.cpp new file mode 100644 index 000000000..56fc22c77 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port_axpy.hip.cpp @@ -0,0 +1,100 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#include +#include +#include "rocblas_port/rocblas-types.h" +#include "rocblas_port/status.h" +#include "rocblas_port/definitions.h" +#include "rocblas_port/handle.h" +#include "rocblas_port/utility.h" +#include "rocblas_port/reduction.h" +#include "rocblas_port/port_helper_func.h" + +namespace { + +template +__global__ void rocblas_axpy_kernel(const rocblas_int n, U alpha_device_host, const T* __restrict__ x, + const rocblas_int incx, T* y, const rocblas_int incy) +{ + auto alpha = load_scalar(alpha_device_host); + + auto row = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + + if (row < n) + y[row * incy] += alpha * x[row * incx]; +} + +template +rocblas_status rocblas_axpy(rocblas_handle handle, rocblas_int n, const T* alpha, const T* x, rocblas_int incx, T* y, + rocblas_int incy) +{ + if (!handle) + return rocblas_status_invalid_handle; + if (!alpha) + return rocblas_status_invalid_pointer; + if (!n) + return rocblas_status_success; + if (!x || !y) + return rocblas_status_invalid_pointer; + + dim3 threads(256); + dim3 grid(n / 256 + (n % 256 != 0)); + hipStream_t rocblas_stream = handle->rocblas_stream; + + if (handle->pointer_mode == rocblas_pointer_mode_device) { + hipLaunchKernelGGL((rocblas_axpy_kernel), grid, threads, 0, rocblas_stream, n, alpha, x, incx, y, incy); + } else { + hipLaunchKernelGGL((rocblas_axpy_kernel), grid, threads, 0, rocblas_stream, n, *alpha, x, incx, y, incy); + } + return rocblas_status_success; +} + +} // namespace + +/* + * =========================================================================== + * C wrapper + * =========================================================================== + */ + +extern "C" { + +rocblas_status rocblas_port_saxpy(rocblas_handle handle, rocblas_int n, const float* alpha, const float* x, + rocblas_int incx, float* y, rocblas_int incy) +{ + return rocblas_axpy(handle, n, alpha, x, incx, y, incy); +} + +rocblas_status rocblas_port_daxpy(rocblas_handle handle, rocblas_int n, const double* alpha, const double* x, + rocblas_int incx, double* y, rocblas_int incy) +{ + return rocblas_axpy(handle, n, alpha, x, incx, y, incy); +} + +rocblas_status rocblas_port_caxpy(rocblas_handle handle, rocblas_int n, const hipFloatComplex* alpha, + const hipFloatComplex* x, rocblas_int incx, hipFloatComplex* y, rocblas_int incy) +{ + return rocblas_axpy(handle, n, alpha, x, incx, y, incy); +} +rocblas_status rocblas_port_zaxpy(rocblas_handle handle, rocblas_int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* x, rocblas_int incx, hipDoubleComplex* y, rocblas_int incy) +{ + return rocblas_axpy(handle, n, alpha, x, incx, y, incy); +} + +} // extern "C" diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port_gemm.hip.cpp b/src/SDDK/GPU/hipblas_port/rocblas_port_gemm.hip.cpp new file mode 100644 index 000000000..6c42ff859 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port_gemm.hip.cpp @@ -0,0 +1,401 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#include +#include +#include "rocblas_port/rocblas-types.h" +#include "rocblas_port/status.h" +#include "rocblas_port/definitions.h" +#include "rocblas_port/handle.h" +#include "rocblas_port/utility.h" +#include "rocblas_port/reduction.h" +#include "rocblas_port/port_helper_func.h" + +namespace { + +template +__global__ void gemmn_kernel(rocblas_int m, rocblas_int n, rocblas_int k, U alpha_device_host, const T* __restrict__ A, + rocblas_int lda, const T* __restrict__ B, rocblas_int ldb, U beta_device_host, T* C, + rocblas_int ldc) +{ + auto alpha = load_scalar(alpha_device_host); + auto beta = load_scalar(beta_device_host); + rocblas_int num_threads = hipBlockDim_x * hipBlockDim_y * hipBlockDim_z; + + if (DIM_X * DIM_Y != num_threads) + return; // need to launch exactly the same number of threads as template parameters indicate + + rocblas_int thread_id = hipThreadIdx_x + hipThreadIdx_y * hipBlockDim_x; + + // threads are all configurated locally + rocblas_int tx = thread_id % DIM_X; + rocblas_int ty = thread_id / DIM_X; + + rocblas_int ind; + + __shared__ T sdata[DIM_X * 4 * DIM_Y]; + + T res_A[4]; // micor tile is 4 * 4 + T res_B[4]; + + res_A[0] = res_B[0] = T(0.0); + res_A[1] = res_B[0] = T(0.0); + res_A[2] = res_B[0] = T(0.0); + res_A[3] = res_B[0] = T(0.0); + + ind = hipBlockIdx_x * DIM_X * 4 + tx; + + rocblas_int k_tail = k % (4 * DIM_Y); + rocblas_int col = ty * 4; + rocblas_int col_B = hipBlockIdx_y; + + B += col_B * MatrixDim::ld(ldb, 1); + + for (col = ty * 4; col < (k - k_tail); col += 4 * DIM_Y) { + res_B[0] = rb_port_conj_op::eval(B[(col + 0) * MatrixDim::inc(ldb, 1)]); + res_B[1] = rb_port_conj_op::eval(B[(col + 1) * MatrixDim::inc(ldb, 1)]); + res_B[2] = rb_port_conj_op::eval(B[(col + 2) * MatrixDim::inc(ldb, 1)]); + res_B[3] = rb_port_conj_op::eval(B[(col + 3) * MatrixDim::inc(ldb, 1)]); + + if (ind < m) { + res_A[0] += A[ind + (col + 0) * lda] * res_B[0]; + res_A[0] += A[ind + (col + 1) * lda] * res_B[1]; + res_A[0] += A[ind + (col + 2) * lda] * res_B[2]; + res_A[0] += A[ind + (col + 3) * lda] * res_B[3]; + } + + if (ind + DIM_X < m) { + res_A[1] += A[ind + DIM_X + (col + 0) * lda] * res_B[0]; + res_A[1] += A[ind + DIM_X + (col + 1) * lda] * res_B[1]; + res_A[1] += A[ind + DIM_X + (col + 2) * lda] * res_B[2]; + res_A[1] += A[ind + DIM_X + (col + 3) * lda] * res_B[3]; + } + + if (ind + 2 * DIM_X < m) { + res_A[2] += A[ind + 2 * DIM_X + (col + 0) * lda] * res_B[0]; + res_A[2] += A[ind + 2 * DIM_X + (col + 1) * lda] * res_B[1]; + res_A[2] += A[ind + 2 * DIM_X + (col + 2) * lda] * res_B[2]; + res_A[2] += A[ind + 2 * DIM_X + (col + 3) * lda] * res_B[3]; + } + + if (ind + 3 * DIM_X < m) { + res_A[3] += A[ind + 3 * DIM_X + (col + 0) * lda] * res_B[0]; + res_A[3] += A[ind + 3 * DIM_X + (col + 1) * lda] * res_B[1]; + res_A[3] += A[ind + 3 * DIM_X + (col + 2) * lda] * res_B[2]; + res_A[3] += A[ind + 3 * DIM_X + (col + 3) * lda] * res_B[3]; + } + } + + // if n is not multiple of (DIM_Y * 4) + if (k_tail > 0) { + res_B[0] = + (col + 0 < k) ? rb_port_conj_op::eval(B[(col + 0) * MatrixDim::inc(ldb, 1)]) : T(0); + res_B[1] = + (col + 1 < k) ? rb_port_conj_op::eval(B[(col + 1) * MatrixDim::inc(ldb, 1)]) : T(0); + res_B[2] = + (col + 2 < k) ? rb_port_conj_op::eval(B[(col + 2) * MatrixDim::inc(ldb, 1)]) : T(0); + res_B[3] = + (col + 3 < k) ? rb_port_conj_op::eval(B[(col + 3) * MatrixDim::inc(ldb, 1)]) : T(0); + + if (ind < m) { + res_A[0] += A[ind + (col + 0) * lda * (col + 0 < k)] * res_B[0]; + res_A[0] += A[ind + (col + 1) * lda * (col + 1 < k)] * res_B[1]; + res_A[0] += A[ind + (col + 2) * lda * (col + 2 < k)] * res_B[2]; + res_A[0] += A[ind + (col + 3) * lda * (col + 3 < k)] * res_B[3]; + } + + if (ind + DIM_X < m) { + res_A[1] += A[ind + DIM_X + (col + 0) * lda * (col + 0 < k)] * res_B[0]; + res_A[1] += A[ind + DIM_X + (col + 1) * lda * (col + 1 < k)] * res_B[1]; + res_A[1] += A[ind + DIM_X + (col + 2) * lda * (col + 2 < k)] * res_B[2]; + res_A[1] += A[ind + DIM_X + (col + 3) * lda * (col + 3 < k)] * res_B[3]; + } + + if (ind + 2 * DIM_X < m) { + res_A[2] += A[ind + 2 * DIM_X + (col + 0) * lda * (col + 0 < k)] * res_B[0]; + res_A[2] += A[ind + 2 * DIM_X + (col + 1) * lda * (col + 1 < k)] * res_B[1]; + res_A[2] += A[ind + 2 * DIM_X + (col + 2) * lda * (col + 2 < k)] * res_B[2]; + res_A[2] += A[ind + 2 * DIM_X + (col + 3) * lda * (col + 3 < k)] * res_B[3]; + } + + if (ind + 3 * DIM_X < m) { + res_A[3] += A[ind + 3 * DIM_X + (col + 0) * lda * (col + 0 < k)] * res_B[0]; + res_A[3] += A[ind + 3 * DIM_X + (col + 1) * lda * (col + 1 < k)] * res_B[1]; + res_A[3] += A[ind + 3 * DIM_X + (col + 2) * lda * (col + 2 < k)] * res_B[2]; + res_A[3] += A[ind + 3 * DIM_X + (col + 3) * lda * (col + 3 < k)] * res_B[3]; + } + } + + sdata[tx + ty * DIM_X * 4] = res_A[0]; + sdata[tx + DIM_X + ty * DIM_X * 4] = res_A[1]; + sdata[tx + 2 * DIM_X + ty * DIM_X * 4] = res_A[2]; + sdata[tx + 3 * DIM_X + ty * DIM_X * 4] = res_A[3]; + + __syncthreads(); + + ind = hipBlockIdx_x * DIM_X * 4 + thread_id; + if (thread_id < DIM_X * 4) { + for (rocblas_int i = 1; i < DIM_Y; i++) + sdata[thread_id] += sdata[thread_id + DIM_X * 4 * i]; + + if (ind < m) + C[ind + col_B * ldc] = alpha * sdata[thread_id] + beta * C[ind + col_B * ldc]; + } +} + +template +__global__ void gemmc_kernel(rocblas_int cols_AT, U alpha_device_host, const T* __restrict__ A, rocblas_int lda, + const T* __restrict__ B, rocblas_int ldb, U beta_device_host, T* C, rocblas_int ldc) +{ + auto alpha = load_scalar(alpha_device_host); + auto beta = load_scalar(beta_device_host); + rocblas_int tx = hipThreadIdx_x; + + if (tx < cols_AT) + A += tx; + + rocblas_int col_A = hipBlockIdx_x; + rocblas_int col_B = hipBlockIdx_y; + A += col_A * lda; + B += col_B * MatrixDim::ld(ldb, 1); + + T res(0); + + __shared__ T sdata[NB_X]; + + // partial sums + rocblas_int cols_AT_full = (cols_AT / NB_X) * NB_X; + + for (rocblas_int i = 0; i < cols_AT_full; i += NB_X) + res += rb_port_conj_op::eval(A[i]) * rb_port_conj_op::eval(B[(tx + i) * MatrixDim::inc(ldb, 1)]); + + if (tx + cols_AT_full < cols_AT) + res += rb_port_conj_op::eval(A[cols_AT_full]) * rb_port_conj_op::eval(B[(tx + cols_AT_full) * MatrixDim::inc(ldb, 1)]); + + sdata[tx] = res; + + // tree reduction of partial sums, + if (NB_X > 16) { + rocblas_sum_reduce(tx, sdata); + } else { + __syncthreads(); + + if (tx == 0) { + for (rocblas_int i = 1; i < cols_AT && i < NB_X; i++) + sdata[0] += sdata[i]; + } + + __syncthreads(); + } + + if (tx == 0) + C[col_A + col_B * ldc] = alpha * sdata[0] + beta * C[col_A + col_B * ldc]; +} + +template +constexpr char rocblas_gemm_name[] = "unknown"; +template <> +constexpr char rocblas_gemm_name[] = "rocblas_sgemm"; +template <> +constexpr char rocblas_gemm_name[] = "rocblas_dgemm"; + +/*! \brief BLAS Level 2 API + + \details + xGEMM performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or + y := alpha*A**T*x + beta*y, or + y := alpha*A**H*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + @param[in] + handle rocblas_handle. + handle to the rocblas library context queue. + @param[in] + trans rocblas_operation + @param[in] + m rocblas_int + @param[in] + n rocblas_int + @param[in] + alpha + specifies the scalar alpha. + @param[in] + A pointer storing matrix A on the GPU. + @param[in] + lda rocblas_int + specifies the leading dimension of A. + @param[in] + x pointer storing vector x on the GPU. + @param[in] + incx specifies the increment for the elements of x. + @param[in] + beta specifies the scalar beta. + @param[out] + y pointer storing vector y on the GPU. + @param[in] + incy rocblas_int + specifies the increment for the elements of y. + + ********************************************************************/ + +template +rocblas_status rocblas_gemm(rocblas_handle handle, rocblas_operation transa, rocblas_int m, + rocblas_int n, rocblas_int k, const T* alpha, const T* A, rocblas_int lda, const T* B, + rocblas_int ldb, const T* beta, T* C, rocblas_int ldc) +{ + if (!handle) + return rocblas_status_invalid_handle; + if (!alpha || !beta) + return rocblas_status_invalid_pointer; + + if (!A || !B || !C) + return rocblas_status_invalid_pointer; + + if (m < 0 || n < 0 || k < 0 || lda < m || lda < 1 || ldb < k || ldb < 1 || ldc < m || ldc < 1) + return rocblas_status_invalid_size; + + /* + * Quick return if possible. Not Argument error + */ + if (!m || !n || !k) + return rocblas_status_success; + + hipStream_t rocblas_stream = handle->rocblas_stream; + + if (transa == rocblas_operation_none) { + // GEMMN_DIM_Y must be at least 4, 8 * 8 is very slow only 40Gflop/s + static constexpr int GEMMN_DIM_X = 32; + static constexpr int GEMMN_DIM_Y = 16; + rocblas_int blocks = (m - 1) / (GEMMN_DIM_X * 4) + 1; + + dim3 gemmn_grid(blocks, n); + dim3 gemmn_threads(GEMMN_DIM_X, GEMMN_DIM_Y); + + if (handle->pointer_mode == rocblas_pointer_mode_device) { + hipLaunchKernelGGL((gemmn_kernel), gemmn_grid, gemmn_threads, 0, rocblas_stream, + m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + } else { + if (rb_port_cmp_and_real_only(*alpha, 0.0) && rb_port_cmp_and_real_only(*beta, 1)) + return rocblas_status_success; + + hipLaunchKernelGGL((gemmn_kernel), gemmn_grid, gemmn_threads, 0, rocblas_stream, + m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); + } + } else { + // transpose + // number of columns on the y-dim of the grid, using gemmc because gemmt(transpose) is a + // instance of gemmc (conjugate) + static constexpr int NB = 256; + dim3 gemmc_grid(m, n); + dim3 gemmc_threads(NB); + + if (handle->pointer_mode == rocblas_pointer_mode_device) { + if (transa == rocblas_operation_transpose) + hipLaunchKernelGGL(gemmc_kernel, gemmc_grid, gemmc_threads, 0, + rocblas_stream, k, alpha, A, lda, B, ldb, beta, C, ldc); + else + hipLaunchKernelGGL(gemmc_kernel, gemmc_grid, + gemmc_threads, 0, rocblas_stream, k, alpha, A, lda, B, ldb, beta, C, ldc); + } else { + if (rb_port_cmp_and_real_only(*alpha, 0) && rb_port_cmp_and_real_only(*beta, 1)) + return rocblas_status_success; + + if (transa == rocblas_operation_transpose) + hipLaunchKernelGGL(gemmc_kernel, gemmc_grid, gemmc_threads, 0, + rocblas_stream, k, *alpha, A, lda, B, ldb, *beta, C, ldc); + else + hipLaunchKernelGGL(gemmc_kernel, gemmc_grid, + gemmc_threads, 0, rocblas_stream, k, *alpha, A, lda, B, ldb, *beta, C, ldc); + } + } + return rocblas_status_success; +} + +} // namespace + +/* + * =========================================================================== + * C wrapper + * =========================================================================== + */ + +extern "C" { + +rocblas_status rocblas_port_sgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* A, + rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, + rocblas_int ldc) +{ + if (transb == rocblas_operation_none) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else if (transb == rocblas_operation_transpose) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else if (transb == rocblas_operation_conjugate_transpose) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else + return rocblas_status_not_implemented; +} + +rocblas_status rocblas_port_dgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* A, + rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, + rocblas_int ldc) +{ + if (transb == rocblas_operation_none) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else if (transb == rocblas_operation_transpose) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else if (transb == rocblas_operation_conjugate_transpose) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else + return rocblas_status_not_implemented; +} + +rocblas_status rocblas_port_cgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const hipFloatComplex* alpha, + const hipFloatComplex* A, rocblas_int lda, const hipFloatComplex* B, rocblas_int ldb, + const hipFloatComplex* beta, hipFloatComplex* C, rocblas_int ldc) +{ + if (transb == rocblas_operation_none) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else if (transb == rocblas_operation_transpose) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else if (transb == rocblas_operation_conjugate_transpose) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else + return rocblas_status_not_implemented; +} + +rocblas_status rocblas_port_zgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const hipDoubleComplex* alpha, + const hipDoubleComplex* A, rocblas_int lda, const hipDoubleComplex* B, + rocblas_int ldb, const hipDoubleComplex* beta, hipDoubleComplex* C, rocblas_int ldc) +{ + if (transb == rocblas_operation_none) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else if (transb == rocblas_operation_transpose) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else if (transb == rocblas_operation_conjugate_transpose) + return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else + return rocblas_status_not_implemented; +} +} // extern "C" diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port_gemv.hip.cpp b/src/SDDK/GPU/hipblas_port/rocblas_port_gemv.hip.cpp new file mode 100644 index 000000000..5051677a3 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port_gemv.hip.cpp @@ -0,0 +1,374 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#include +#include +#include "rocblas_port/rocblas-types.h" +#include "rocblas_port/status.h" +#include "rocblas_port/definitions.h" +#include "rocblas_port/handle.h" +#include "rocblas_port/utility.h" +#include "rocblas_port/reduction.h" +#include "rocblas_port/port_helper_func.h" + +namespace { + +template +__global__ void gemvn_kernel(rocblas_int m, rocblas_int n, U alpha_device_host, const T* __restrict__ A, + rocblas_int lda, const T* __restrict__ x, rocblas_int incx, U beta_device_host, T* y, + rocblas_int incy) +{ + auto alpha = load_scalar(alpha_device_host); + auto beta = load_scalar(beta_device_host); + rocblas_int num_threads = hipBlockDim_x * hipBlockDim_y * hipBlockDim_z; + + if (DIM_X * DIM_Y != num_threads) + return; // need to launch exactly the same number of threads as template parameters indicate + + rocblas_int thread_id = hipThreadIdx_x + hipThreadIdx_y * hipBlockDim_x; + + // threads are all configurated locally + rocblas_int tx = thread_id % DIM_X; + rocblas_int ty = thread_id / DIM_X; + + rocblas_int ind; + + __shared__ T sdata[DIM_X * 4 * DIM_Y]; + + T res_A[4]; // micor tile is 4 * 4 + T res_x[4]; + + res_A[0] = res_x[0] = T(0.0); + res_A[1] = res_x[0] = T(0.0); + res_A[2] = res_x[0] = T(0.0); + res_A[3] = res_x[0] = T(0.0); + + ind = hipBlockIdx_x * DIM_X * 4 + tx; + + rocblas_int n_tail = n % (4 * DIM_Y); + rocblas_int col = ty * 4; + + for (col = ty * 4; col < (n - n_tail); col += 4 * DIM_Y) { + res_x[0] = x[(col + 0) * incx]; + res_x[1] = x[(col + 1) * incx]; + res_x[2] = x[(col + 2) * incx]; + res_x[3] = x[(col + 3) * incx]; + + if (ind < m) { + res_A[0] += A[ind + (col + 0) * lda] * res_x[0]; + res_A[0] += A[ind + (col + 1) * lda] * res_x[1]; + res_A[0] += A[ind + (col + 2) * lda] * res_x[2]; + res_A[0] += A[ind + (col + 3) * lda] * res_x[3]; + } + + if (ind + DIM_X < m) { + res_A[1] += A[ind + DIM_X + (col + 0) * lda] * res_x[0]; + res_A[1] += A[ind + DIM_X + (col + 1) * lda] * res_x[1]; + res_A[1] += A[ind + DIM_X + (col + 2) * lda] * res_x[2]; + res_A[1] += A[ind + DIM_X + (col + 3) * lda] * res_x[3]; + } + + if (ind + 2 * DIM_X < m) { + res_A[2] += A[ind + 2 * DIM_X + (col + 0) * lda] * res_x[0]; + res_A[2] += A[ind + 2 * DIM_X + (col + 1) * lda] * res_x[1]; + res_A[2] += A[ind + 2 * DIM_X + (col + 2) * lda] * res_x[2]; + res_A[2] += A[ind + 2 * DIM_X + (col + 3) * lda] * res_x[3]; + } + + if (ind + 3 * DIM_X < m) { + res_A[3] += A[ind + 3 * DIM_X + (col + 0) * lda] * res_x[0]; + res_A[3] += A[ind + 3 * DIM_X + (col + 1) * lda] * res_x[1]; + res_A[3] += A[ind + 3 * DIM_X + (col + 2) * lda] * res_x[2]; + res_A[3] += A[ind + 3 * DIM_X + (col + 3) * lda] * res_x[3]; + } + } + + // if n is not multiple of (DIM_Y * 4) + if (n_tail > 0) { + res_x[0] = (col + 0 < n) ? x[(col + 0) * incx] : T(0); + res_x[1] = (col + 1 < n) ? x[(col + 1) * incx] : T(0); + res_x[2] = (col + 2 < n) ? x[(col + 2) * incx] : T(0); + res_x[3] = (col + 3 < n) ? x[(col + 3) * incx] : T(0); + + if (ind < m) { + res_A[0] += A[ind + (col + 0) * lda * (col + 0 < n)] * res_x[0]; + res_A[0] += A[ind + (col + 1) * lda * (col + 1 < n)] * res_x[1]; + res_A[0] += A[ind + (col + 2) * lda * (col + 2 < n)] * res_x[2]; + res_A[0] += A[ind + (col + 3) * lda * (col + 3 < n)] * res_x[3]; + } + + if (ind + DIM_X < m) { + res_A[1] += A[ind + DIM_X + (col + 0) * lda * (col + 0 < n)] * res_x[0]; + res_A[1] += A[ind + DIM_X + (col + 1) * lda * (col + 1 < n)] * res_x[1]; + res_A[1] += A[ind + DIM_X + (col + 2) * lda * (col + 2 < n)] * res_x[2]; + res_A[1] += A[ind + DIM_X + (col + 3) * lda * (col + 3 < n)] * res_x[3]; + } + + if (ind + 2 * DIM_X < m) { + res_A[2] += A[ind + 2 * DIM_X + (col + 0) * lda * (col + 0 < n)] * res_x[0]; + res_A[2] += A[ind + 2 * DIM_X + (col + 1) * lda * (col + 1 < n)] * res_x[1]; + res_A[2] += A[ind + 2 * DIM_X + (col + 2) * lda * (col + 2 < n)] * res_x[2]; + res_A[2] += A[ind + 2 * DIM_X + (col + 3) * lda * (col + 3 < n)] * res_x[3]; + } + + if (ind + 3 * DIM_X < m) { + res_A[3] += A[ind + 3 * DIM_X + (col + 0) * lda * (col + 0 < n)] * res_x[0]; + res_A[3] += A[ind + 3 * DIM_X + (col + 1) * lda * (col + 1 < n)] * res_x[1]; + res_A[3] += A[ind + 3 * DIM_X + (col + 2) * lda * (col + 2 < n)] * res_x[2]; + res_A[3] += A[ind + 3 * DIM_X + (col + 3) * lda * (col + 3 < n)] * res_x[3]; + } + } + + sdata[tx + ty * DIM_X * 4] = res_A[0]; + sdata[tx + DIM_X + ty * DIM_X * 4] = res_A[1]; + sdata[tx + 2 * DIM_X + ty * DIM_X * 4] = res_A[2]; + sdata[tx + 3 * DIM_X + ty * DIM_X * 4] = res_A[3]; + + __syncthreads(); + + ind = hipBlockIdx_x * DIM_X * 4 + thread_id; + if (thread_id < DIM_X * 4) { + for (rocblas_int i = 1; i < DIM_Y; i++) + sdata[thread_id] += sdata[thread_id + DIM_X * 4 * i]; + + if (ind < m) + y[ind * incy] = alpha * sdata[thread_id] + beta * y[ind * incy]; + } +} + +template +__global__ void gemvc_kernel(rocblas_int m, rocblas_int n, U alpha_device_host, const T* __restrict__ A, + rocblas_int lda, const T* __restrict__ x, rocblas_int incx, U beta_device_host, T* y, + rocblas_int incy) +{ + auto alpha = load_scalar(alpha_device_host); + auto beta = load_scalar(beta_device_host); + rocblas_int tx = hipThreadIdx_x; + + if (tx < m) + A += tx; + + rocblas_int col = hipBlockIdx_x; + A += col * lda; + + T res(0); + + __shared__ T sdata[NB_X]; + + // partial sums + rocblas_int m_full = (m / NB_X) * NB_X; + + for (rocblas_int i = 0; i < m_full; i += NB_X) + res += rb_port_conj_op::eval(A[i]) * x[(tx + i) * incx]; + + if (tx + m_full < m) + res += rb_port_conj_op::eval(A[m_full]) * x[(tx + m_full) * incx]; + + sdata[tx] = res; + + // tree reduction of partial sums, + if (NB_X > 16) { + rocblas_sum_reduce(tx, sdata); + } else { + __syncthreads(); + + if (tx == 0) { + for (rocblas_int i = 1; i < m && i < NB_X; i++) + sdata[0] += sdata[i]; + } + + __syncthreads(); + } + + if (tx == 0) + y[col * incy] = alpha * sdata[0] + beta * y[col * incy]; +} + +template +constexpr char rocblas_gemv_name[] = "unknown"; +template <> +constexpr char rocblas_gemv_name[] = "rocblas_sgemv"; +template <> +constexpr char rocblas_gemv_name[] = "rocblas_dgemv"; + +/*! \brief BLAS Level 2 API + + \details + xGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or + y := alpha*A**T*x + beta*y, or + y := alpha*A**H*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + @param[in] + handle rocblas_handle. + handle to the rocblas library context queue. + @param[in] + trans rocblas_operation + @param[in] + m rocblas_int + @param[in] + n rocblas_int + @param[in] + alpha + specifies the scalar alpha. + @param[in] + A pointer storing matrix A on the GPU. + @param[in] + lda rocblas_int + specifies the leading dimension of A. + @param[in] + x pointer storing vector x on the GPU. + @param[in] + incx specifies the increment for the elements of x. + @param[in] + beta specifies the scalar beta. + @param[out] + y pointer storing vector y on the GPU. + @param[in] + incy rocblas_int + specifies the increment for the elements of y. + + ********************************************************************/ + +template +rocblas_status rocblas_gemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, + const T* alpha, const T* A, rocblas_int lda, const T* x, rocblas_int incx, const T* beta, + T* y, rocblas_int incy) +{ + if (!handle) + return rocblas_status_invalid_handle; + if (!alpha || !beta) + return rocblas_status_invalid_pointer; + + if (!A || !x || !y) + return rocblas_status_invalid_pointer; + + if (m < 0 || n < 0 || lda < m || lda < 1 || !incx || !incy) + return rocblas_status_invalid_size; + + /* + * Quick return if possible. Not Argument error + */ + if (!m || !n) + return rocblas_status_success; + + hipStream_t rocblas_stream = handle->rocblas_stream; + + if (transA == rocblas_operation_none) { + // GEMVN_DIM_Y must be at least 4, 8 * 8 is very slow only 40Gflop/s + static constexpr int GEMVN_DIM_X = 32; + static constexpr int GEMVN_DIM_Y = 16; + rocblas_int blocks = (m - 1) / (GEMVN_DIM_X * 4) + 1; + + dim3 gemvn_grid(blocks); + dim3 gemvn_threads(GEMVN_DIM_X, GEMVN_DIM_Y); + + if (incx < 0) + x -= ssize_t(incx) * (n - 1); + if (incy < 0) + y -= ssize_t(incy) * (m - 1); + + if (handle->pointer_mode == rocblas_pointer_mode_device) { + hipLaunchKernelGGL((gemvn_kernel), gemvn_grid, gemvn_threads, 0, rocblas_stream, + m, n, alpha, A, lda, x, incx, beta, y, incy); + } else { + if (rb_port_cmp_and_real_only(*alpha, 0.0) && rb_port_cmp_and_real_only(*beta, 1)) + return rocblas_status_success; + + hipLaunchKernelGGL((gemvn_kernel), gemvn_grid, gemvn_threads, 0, rocblas_stream, + m, n, *alpha, A, lda, x, incx, *beta, y, incy); + } + } else { + // transpose + // number of columns on the y-dim of the grid, using gemvc because gemvt(transpose) is a + // instance of gemvc (conjugate) + static constexpr int NB = 256; + dim3 gemvc_grid(n); + dim3 gemvc_threads(NB); + + if (incx < 0) + x -= ssize_t(incx) * (m - 1); + if (incy < 0) + y -= ssize_t(incy) * (n - 1); + + if (handle->pointer_mode == rocblas_pointer_mode_device) { + if (transA == rocblas_operation_transpose) + hipLaunchKernelGGL(gemvc_kernel, gemvc_grid, gemvc_threads, 0, + rocblas_stream, m, n, alpha, A, lda, x, incx, beta, y, incy); + else + hipLaunchKernelGGL(gemvc_kernel, gemvc_grid, gemvc_threads, + 0, rocblas_stream, m, n, alpha, A, lda, x, incx, beta, y, incy); + } else { + if (rb_port_cmp_and_real_only(*alpha, 0) && rb_port_cmp_and_real_only(*beta, 1)) + return rocblas_status_success; + + if (transA == rocblas_operation_transpose) + hipLaunchKernelGGL(gemvc_kernel, gemvc_grid, gemvc_threads, 0, + rocblas_stream, m, n, *alpha, A, lda, x, incx, *beta, y, incy); + else + hipLaunchKernelGGL(gemvc_kernel, gemvc_grid, gemvc_threads, + 0, rocblas_stream, m, n, *alpha, A, lda, x, incx, *beta, y, incy); + } + } + return rocblas_status_success; +} + +} // namespace + +/* + * =========================================================================== + * C wrapper + * =========================================================================== + */ + +extern "C" { + +rocblas_status rocblas_port_sgemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, + const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, + const float* beta, float* y, rocblas_int incy) +{ + return rocblas_gemv(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +rocblas_status rocblas_port_dgemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, + const double* alpha, const double* A, rocblas_int lda, const double* x, + rocblas_int incx, const double* beta, double* y, rocblas_int incy) +{ + return rocblas_gemv(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +rocblas_status rocblas_port_cgemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, + const hipFloatComplex* alpha, const hipFloatComplex* A, rocblas_int lda, + const hipFloatComplex* x, rocblas_int incx, const hipFloatComplex* beta, + hipFloatComplex* y, rocblas_int incy) +{ + return rocblas_gemv(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +rocblas_status rocblas_port_zgemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, + const hipDoubleComplex* alpha, const hipDoubleComplex* A, rocblas_int lda, + const hipDoubleComplex* x, rocblas_int incx, const hipDoubleComplex* beta, + hipDoubleComplex* y, rocblas_int incy) +{ + return rocblas_gemv(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +} // extern "C" diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port_ger.hip.cpp b/src/SDDK/GPU/hipblas_port/rocblas_port_ger.hip.cpp new file mode 100644 index 000000000..85d17de9f --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port_ger.hip.cpp @@ -0,0 +1,108 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#include +#include +#include "rocblas_port/rocblas-types.h" +#include "rocblas_port/status.h" +#include "rocblas_port/definitions.h" +#include "rocblas_port/handle.h" +#include "rocblas_port/utility.h" +#include "rocblas_port/reduction.h" +#include "rocblas_port/port_helper_func.h" + +namespace { + +template +__global__ void rocblas_ger_kernel(const rocblas_int rows_A, const rocblas_int cols_A, const T* __restrict__ x, + const rocblas_int incx, const T* __restrict__ y, rocblas_int incy, + U alpha_device_host, T* A, const rocblas_int lda) +{ + auto alpha = load_scalar(alpha_device_host); + + auto row = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + auto col = hipBlockIdx_y; + + if (row < rows_A) + A[row + col * lda] += alpha * x[row * incx] * y[col * incy]; +} + +template +rocblas_status rocblas_ger(rocblas_handle handle, rocblas_int m, rocblas_int n, const T* alpha, const T* x, rocblas_int incx, + const T* y, rocblas_int incy, T* A, rocblas_int lda) +{ + if (!handle) + return rocblas_status_invalid_handle; + if (!alpha) + return rocblas_status_invalid_pointer; + if (!n || !m) + return rocblas_status_success; + if (!A || !x || !y) + return rocblas_status_invalid_pointer; + + dim3 threads(256); + dim3 grid(m / 256 + (m % 256 != 0), n); + hipStream_t rocblas_stream = handle->rocblas_stream; + + if (handle->pointer_mode == rocblas_pointer_mode_device) { + hipLaunchKernelGGL((rocblas_ger_kernel), grid, threads, 0, rocblas_stream, + m, n, x, incx, y, incy, alpha, A, lda); + } else { + hipLaunchKernelGGL((rocblas_ger_kernel), grid, threads, 0, rocblas_stream, + m, n, x, incx, y, incy, *alpha, A, lda); + } + return rocblas_status_success; +} + +} // namespace + +/* + * =========================================================================== + * C wrapper + * =========================================================================== + */ + +extern "C" { + +rocblas_status rocblas_port_sger(rocblas_handle handle, rocblas_int m, rocblas_int n, const float* alpha, + const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* A, + rocblas_int lda) +{ + return rocblas_ger(handle, m, n, alpha, x, incx, y, incy, A, lda); +} + +rocblas_status rocblas_port_dger(rocblas_handle handle, rocblas_int m, rocblas_int n, const double* alpha, + const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* A, + rocblas_int lda) +{ + return rocblas_ger(handle, m, n, alpha, x, incx, y, incy, A, lda); +} + +rocblas_status rocblas_port_cgeru(rocblas_handle handle, rocblas_int m, rocblas_int n, const hipFloatComplex* alpha, + const hipFloatComplex* x, rocblas_int incx, const hipFloatComplex* y, + rocblas_int incy, hipFloatComplex* A, rocblas_int lda) +{ + return rocblas_ger(handle, m, n, alpha, x, incx, y, incy, A, lda); +} +rocblas_status rocblas_port_zgeru(rocblas_handle handle, rocblas_int m, rocblas_int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* x, rocblas_int incx, const hipDoubleComplex* y, + rocblas_int incy, hipDoubleComplex* A, rocblas_int lda) +{ + return rocblas_ger(handle, m, n, alpha, x, incx, y, incy, A, lda); +} + +} // extern "C" diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port_trmm.hip.cpp b/src/SDDK/GPU/hipblas_port/rocblas_port_trmm.hip.cpp new file mode 100644 index 000000000..a90c95d75 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/rocblas_port_trmm.hip.cpp @@ -0,0 +1,744 @@ +/* ************************************************************************ + * Copyright 2016 Advanced Micro Devices, Inc. + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#include +#include +#include +#include +#include "rocblas_port/rocblas-types.h" +#include "rocblas_port/status.h" +#include "rocblas_port/definitions.h" +#include "rocblas_port/handle.h" +#include "rocblas_port/utility.h" +#include "rocblas_port/reduction.h" +#include "rocblas_port/port_helper_func.h" + +namespace { + + +template +struct CreateReal { + template + __device__ __host__ static inline T eval(const U& val) { + return T(val); + } +}; + +template<> +struct CreateReal { + template + __device__ __host__ static inline hipFloatComplex eval(const U& val) { + return hipFloatComplex((float)val, 0.f); + } +}; + +template<> +struct CreateReal { + template + __device__ __host__ static inline hipDoubleComplex eval(const U& val) { + return hipDoubleComplex((double)val, 0.); + } +}; + + +template +struct MatrixLoad; + + +/* + * FULL Matrix + */ +template +struct MatrixLoad { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + return M[row * inc + col * ld]; + } +}; + +//transposed +template +struct MatrixLoad { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + return M[col * inc + row * ld]; + } +}; + +/* + * Lower Tri Matrix + */ +// non-unit diag +template<> +struct MatrixLoad { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + if (col > row) return CreateReal::eval(0); + return M[row * inc + col * ld]; + } +}; + +// transposed non-unit diag +template +struct MatrixLoad { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + if (row > col) return CreateReal::eval(0); + return M[col * inc + row * ld]; + } +}; + +// unit diag +template<> +struct MatrixLoad { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + if (col == row) return CreateReal::eval(1); + if (col > row) return CreateReal::eval(0); + return M[row * inc + col * ld]; + } +}; + +// transposed unit diag +template +struct MatrixLoad { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + if (col == row) return CreateReal::eval(1); + if (row > col) return CreateReal::eval(0); + return M[col * inc + row * ld]; + } +}; + +/* + * Upper Tri Matrix + */ +// non-unit diag +template<> +struct MatrixLoad { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + if (col < row) return CreateReal::eval(0); + return M[row * inc + col * ld]; + } +}; +// transposed non-unit diag +template +struct MatrixLoad { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + if (row < col) return CreateReal::eval(0); + return M[col * inc + row * ld]; + } +}; + +// unit diag +template <> +struct MatrixLoad +{ + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + if (col == row) + return CreateReal::eval(1); + if (col < row) + return CreateReal::eval(0); + return M[row * inc + col * ld]; + } +}; + +// transposed unit diag +template +struct MatrixLoad +{ + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col) + { + if (col == row) + return CreateReal::eval(1); + if (row < col) + return CreateReal::eval(0); + return M[col * inc + row * ld]; + } +}; + +/* + * A*B and A*B^t and A*B^H + */ +template +__global__ void trmmn_kernel(rocblas_int m, rocblas_int n, U alpha_device_host, const T* __restrict__ A, + rocblas_int lda, const T* __restrict__ B, rocblas_int ldb, T* C, rocblas_int ldc) +{ + auto alpha = load_scalar(alpha_device_host); + rocblas_int num_threads = hipBlockDim_x * hipBlockDim_y * hipBlockDim_z; + + if (DIM_X * DIM_Y != num_threads) + return; // need to launch exactly the same number of threads as template parameters indicate + + rocblas_int thread_id = hipThreadIdx_x + hipThreadIdx_y * hipBlockDim_x; + + // threads are all configurated locally + rocblas_int tx = thread_id % DIM_X; + rocblas_int ty = thread_id / DIM_X; + + rocblas_int ind; + + __shared__ T sdata[DIM_X * 4 * DIM_Y]; + + T res_A[4]; // micor tile is 4 * 4 + T res_B[4]; + + res_A[0] = res_B[0] = T(0.0); + res_A[1] = res_B[0] = T(0.0); + res_A[2] = res_B[0] = T(0.0); + res_A[3] = res_B[0] = T(0.0); + + ind = hipBlockIdx_x * DIM_X * 4 + tx; + + rocblas_int n_tail = n % (4 * DIM_Y); + rocblas_int col = ty * 4; + rocblas_int col_B = hipBlockIdx_y; + + // B += col_B * ldb; + + for (col = ty * 4; col < (n - n_tail); col += 4 * DIM_Y) { + res_B[0] = MatrixLoad::eval(B, ldb, 1, col + 0, col_B); + res_B[1] = MatrixLoad::eval(B, ldb, 1, col + 1, col_B); + res_B[2] = MatrixLoad::eval(B, ldb, 1, col + 2, col_B); + res_B[3] = MatrixLoad::eval(B, ldb, 1, col + 3, col_B); + + res_B[0] = rb_port_conj_op::eval(res_B[0]); + res_B[1] = rb_port_conj_op::eval(res_B[1]); + res_B[2] = rb_port_conj_op::eval(res_B[2]); + res_B[3] = rb_port_conj_op::eval(res_B[3]); + + if (ind < m) { + res_A[0] += MatrixLoad::eval(A, lda, 1, ind, (col + 0)) * res_B[0]; + res_A[0] += MatrixLoad::eval(A, lda, 1, ind, (col + 1)) * res_B[1]; + res_A[0] += MatrixLoad::eval(A, lda, 1, ind, (col + 2)) * res_B[2]; + res_A[0] += MatrixLoad::eval(A, lda, 1, ind, (col + 3)) * res_B[3]; + } + + if (ind + DIM_X < m) { + res_A[1] += + MatrixLoad::eval(A, lda, 1, ind + DIM_X, (col + 0)) * res_B[0]; + res_A[1] += + MatrixLoad::eval(A, lda, 1, ind + DIM_X, (col + 1)) * res_B[1]; + res_A[1] += + MatrixLoad::eval(A, lda, 1, ind + DIM_X, (col + 2)) * res_B[2]; + res_A[1] += + MatrixLoad::eval(A, lda, 1, ind + DIM_X, (col + 3)) * res_B[3]; + } + + if (ind + 2 * DIM_X < m) { + res_A[2] += + MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, (col + 0)) * + res_B[0]; + res_A[2] += + MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, (col + 1)) * + res_B[1]; + res_A[2] += + MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, (col + 2)) * + res_B[2]; + res_A[2] += + MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, (col + 3)) * + res_B[3]; + } + + if (ind + 3 * DIM_X < m) { + res_A[3] += + MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, (col + 0)) * + res_B[0]; + res_A[3] += + MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, (col + 1)) * + res_B[1]; + res_A[3] += + MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, (col + 2)) * + res_B[2]; + res_A[3] += + MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, (col + 3)) * + res_B[3]; + } + } + + // if n is not multiple of (DIM_Y * 4) + if (n_tail > 0) { + res_B[0] = T(0); + res_B[1] = T(0); + res_B[2] = T(0); + res_B[3] = T(0); + + if (col + 0 < n) + res_B[0] = MatrixLoad::eval(B, ldb, 1, col + 0, col_B); + if (col + 1 < n) + res_B[1] = MatrixLoad::eval(B, ldb, 1, col + 1, col_B); + if (col + 2 < n) + res_B[2] = MatrixLoad::eval(B, ldb, 1, col + 2, col_B); + if (col + 3 < n) + res_B[3] = MatrixLoad::eval(B, ldb, 1, col + 3, col_B); + + res_B[0] = rb_port_conj_op::eval(res_B[0]); + res_B[1] = rb_port_conj_op::eval(res_B[1]); + res_B[2] = rb_port_conj_op::eval(res_B[2]); + res_B[3] = rb_port_conj_op::eval(res_B[3]); + + if (ind < m) { + res_A[0] += + MatrixLoad::eval(A, lda, 1, ind, (col + 0) * (col + 0 < n)) * + res_B[0]; + res_A[0] += + MatrixLoad::eval(A, lda, 1, ind, (col + 1) * (col + 1 < n)) * + res_B[1]; + res_A[0] += + MatrixLoad::eval(A, lda, 1, ind, (col + 2) * (col + 2 < n)) * + res_B[2]; + res_A[0] += + MatrixLoad::eval(A, lda, 1, ind, (col + 3) * (col + 3 < n)) * + res_B[3]; + } + + if (ind + DIM_X < m) { + res_A[1] += MatrixLoad::eval(A, lda, 1, ind + DIM_X, + (col + 0) * (col + 0 < n)) * + res_B[0]; + res_A[1] += MatrixLoad::eval(A, lda, 1, ind + DIM_X, + (col + 1) * (col + 1 < n)) * + res_B[1]; + res_A[1] += MatrixLoad::eval(A, lda, 1, ind + DIM_X, + (col + 2) * (col + 2 < n)) * + res_B[2]; + res_A[1] += MatrixLoad::eval(A, lda, 1, ind + DIM_X, + (col + 3) * (col + 3 < n)) * + res_B[3]; + } + + if (ind + 2 * DIM_X < m) { + res_A[2] += MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, + (col + 0) * (col + 0 < n)) * + res_B[0]; + res_A[2] += MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, + (col + 1) * (col + 1 < n)) * + res_B[1]; + res_A[2] += MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, + (col + 2) * (col + 2 < n)) * + res_B[2]; + res_A[2] += MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, + (col + 3) * (col + 3 < n)) * + res_B[3]; + } + + if (ind + 3 * DIM_X < m) { + res_A[3] += MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, + (col + 0) * (col + 0 < n)) * + res_B[0]; + res_A[3] += MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, + (col + 1) * (col + 1 < n)) * + res_B[1]; + res_A[3] += MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, + (col + 2) * (col + 2 < n)) * + res_B[2]; + res_A[3] += MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, + (col + 3) * (col + 3 < n)) * + res_B[3]; + } + } + + sdata[tx + ty * DIM_X * 4] = res_A[0]; + sdata[tx + DIM_X + ty * DIM_X * 4] = res_A[1]; + sdata[tx + 2 * DIM_X + ty * DIM_X * 4] = res_A[2]; + sdata[tx + 3 * DIM_X + ty * DIM_X * 4] = res_A[3]; + + __syncthreads(); + + ind = hipBlockIdx_x * DIM_X * 4 + thread_id; + if (thread_id < DIM_X * 4) { + for (rocblas_int i = 1; i < DIM_Y; i++) + sdata[thread_id] += sdata[thread_id + DIM_X * 4 * i]; + + if (ind < m) + C[ind + col_B * ldc] = alpha * sdata[thread_id]; + } +} + +template +__global__ void trmmc_kernel(rocblas_int m, rocblas_int n, U alpha_device_host, const T* __restrict__ A, + rocblas_int lda, const T* __restrict__ B, rocblas_int ldb, T* C, rocblas_int ldc) +{ + + auto alpha = load_scalar(alpha_device_host); + rocblas_int tx = hipThreadIdx_x; + + // if (tx < m) + // A += tx; + rocblas_int tx_load_id = 0; + if (tx < m) + tx_load_id = tx; + + rocblas_int col_A = hipBlockIdx_x; + rocblas_int col_B = hipBlockIdx_y; + // A += col_A * lda; + // B += col_B * ldb; + + T res(0); + + __shared__ T sdata[NB_X]; + + // partial sums + rocblas_int m_full = (m / NB_X) * NB_X; + + for (rocblas_int i = 0; i < m_full; i += NB_X) + res += + rb_port_conj_op::eval(MatrixLoad::eval(A, lda, 1, i + tx_load_id, col_A)) * + rb_port_conj_op::eval(MatrixLoad::eval(B, ldb, 1, (tx + i), col_B)); + + if (tx + m_full < m) + res += rb_port_conj_op::eval( + MatrixLoad::eval(A, lda, 1, m_full + tx_load_id, col_A)) * + rb_port_conj_op::eval(MatrixLoad::eval(B, ldb, 1, (tx + m_full), col_B)); + + sdata[tx] = res; + + // tree reduction of partial sums, + if (NB_X > 16) { + rocblas_sum_reduce(tx, sdata); + } else { + __syncthreads(); + + if (tx == 0) { + for (rocblas_int i = 1; i < m && i < NB_X; i++) + sdata[0] += sdata[i]; + } + + __syncthreads(); + } + + if (tx == 0) + C[col_A + col_B * ldc] = alpha * sdata[0]; +} + +/*! \brief BLAS Level 2 API + + \details + xGEMM performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or + y := alpha*A**T*x + beta*y, or + y := alpha*A**H*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + @param[in] + handle rocblas_handle. + handle to the rocblas library context queue. + @param[in] + trans rocblas_operation + @param[in] + m rocblas_int + @param[in] + n rocblas_int + @param[in] + alpha + specifies the scalar alpha. + @param[in] + A pointer storing matrix A on the GPU. + @param[in] + lda rocblas_int + specifies the leading dimension of A. + @param[in] + x pointer storing vector x on the GPU. + @param[in] + incx specifies the increment for the elements of x. + @param[in] + beta specifies the scalar beta. + @param[out] + y pointer storing vector y on the GPU. + @param[in] + incy rocblas_int + specifies the increment for the elements of y. + + ********************************************************************/ + +template +rocblas_status rocblas_trmm(rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, + rocblas_int cols_B, const T* alpha, const T* A, rocblas_int lda, const T* B, + rocblas_int ldb, T* C, rocblas_int ldc) +{ + if (!handle) + return rocblas_status_invalid_handle; + if (!alpha) + return rocblas_status_invalid_pointer; + + if (!A || !B || !C) + return rocblas_status_invalid_pointer; + + if (rows_A < 0 || rows_B < 0 || cols_B < 0 || lda < rows_A || lda < 1 || ldb < rows_B || ldb < 1 || ldc < rows_A || ldc < 1) + return rocblas_status_invalid_size; + + /* + * Quick return if possible. Not Argument error + */ + if (!rows_A || !rows_B || !cols_B) + return rocblas_status_success; + + hipStream_t rocblas_stream = handle->rocblas_stream; + + if (transa == rocblas_operation_none) { + // GEMMN_DIM_Y must be at least 4, 8 * 8 is very slow only 40Gflop/s + static constexpr int GEMMN_DIM_X = 32; + static constexpr int GEMMN_DIM_Y = 16; + rocblas_int blocks = (rows_A - 1) / (GEMMN_DIM_X * 4) + 1; + + dim3 trmmn_grid(blocks, cols_B); + dim3 trmmn_threads(GEMMN_DIM_X, GEMMN_DIM_Y); + + if (handle->pointer_mode == rocblas_pointer_mode_device) { + hipLaunchKernelGGL((trmmn_kernel), + trmmn_grid, trmmn_threads, 0, rocblas_stream, rows_A, rows_B, alpha, A, lda, B, ldb, C, ldc); + } else { + if (rb_port_cmp_and_real_only(*alpha, 0.0)) + return rocblas_status_success; + + hipLaunchKernelGGL((trmmn_kernel), + trmmn_grid, trmmn_threads, 0, rocblas_stream, rows_A, rows_B, *alpha, A, lda, B, ldb, C, ldc); + } + } else { + // transpose + // number of columns on the y-dim of the grid, using trmmc because trmmt(transpose) is a + // instance of trmmc (conjugate) + static constexpr int NB = 256; + dim3 trmmc_grid(rows_B, cols_B); + dim3 trmmc_threads(NB); + + if (handle->pointer_mode == rocblas_pointer_mode_device) { + if (transa == rocblas_operation_transpose) + hipLaunchKernelGGL(trmmc_kernel, + trmmc_grid, trmmc_threads, 0, rocblas_stream, rows_A, rows_B, alpha, A, lda, B, ldb, C, ldc); + else + hipLaunchKernelGGL( + trmmc_kernel, + trmmc_grid, trmmc_threads, 0, rocblas_stream, rows_A, rows_B, alpha, A, lda, B, ldb, C, ldc); + } else { + if (rb_port_cmp_and_real_only(*alpha, 0)) + return rocblas_status_success; + + if (transa == rocblas_operation_transpose) + hipLaunchKernelGGL(trmmc_kernel, + trmmc_grid, trmmc_threads, 0, rocblas_stream, rows_A, rows_B, *alpha, A, lda, B, ldb, C, ldc); + else + hipLaunchKernelGGL( + trmmc_kernel, + trmmc_grid, trmmc_threads, 0, rocblas_stream, rows_A, rows_B, *alpha, A, lda, B, ldb, C, ldc); + } + } + return rocblas_status_success; +} + +/* + * =========================================================================== + * Helper functions to select template parameters + * =========================================================================== + */ +template +rocblas_status rocblas_trmm_select_op_b( + std::tuple templ_param, + rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, + const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) +{ + if (std::get<4>(templ_param) == rocblas_operation_none) { + return rocblas_trmm( + handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); + } else if (std::get<4>(templ_param) == rocblas_operation_transpose) { + return rocblas_trmm( + handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); + } else { + return rocblas_trmm( + handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); + } +} + +template +rocblas_status rocblas_trmm_select_diag2( + std::tuple templ_param, + rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, + const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) +{ + if (std::get<3>(templ_param) == rocblas_diagonal_unit) { + return rocblas_trmm_select_op_b( + templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); + } else { + return rocblas_trmm_select_op_b( + templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); + } +} + +template +rocblas_status rocblas_trmm_select_diag1( + std::tuple templ_param, + rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, + const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) +{ + if (std::get<2>(templ_param) == rocblas_diagonal_unit) { + return rocblas_trmm_select_diag2( + templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); + } else { + return rocblas_trmm_select_diag2( + templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); + } +} + +template +rocblas_status rocblas_trmm_select_fill2( + std::tuple templ_param, + rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, + const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) +{ + if (std::get<1>(templ_param) == rocblas_fill_lower) { + return rocblas_trmm_select_diag1(templ_param, handle, transa, rows_A, rows_B, + cols_B, alpha, A, lda, B, ldb, C, ldc); + } else if (std::get<1>(templ_param) == rocblas_fill_upper) { + return rocblas_trmm_select_diag1(templ_param, handle, transa, rows_A, rows_B, + cols_B, alpha, A, lda, B, ldb, C, ldc); + } else { + return rocblas_trmm_select_diag1(templ_param, handle, transa, rows_A, rows_B, cols_B, + alpha, A, lda, B, ldb, C, ldc); + } +} + +template +rocblas_status rocblas_trmm_select_fill1( + std::tuple templ_param, + rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, + const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) +{ + if (std::get<0>(templ_param) == rocblas_fill_lower) { + return rocblas_trmm_select_fill2(templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, + A, lda, B, ldb, C, ldc); + } else if (std::get<0>(templ_param) == rocblas_fill_upper) { + return rocblas_trmm_select_fill2(templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, + A, lda, B, ldb, C, ldc); + } else { + return rocblas_trmm_select_fill2(templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, + A, lda, B, ldb, C, ldc); + } +} + +template +rocblas_status rocblas_trmm_select(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation trans, + rocblas_diagonal diag, rocblas_int m, rocblas_int n, const T* alpha, const T* A, + rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) +{ + rocblas_operation transA, transB; + rocblas_fill uploA, uploB; + rocblas_diagonal diagA, diagB; + rocblas_int rows_A, rows_B, cols_B; + + // create parameters according to multiplication order + if (side == rocblas_side_right) { + rows_A = m; + rows_B = n; + cols_B = n; + std::swap(A, B); + std::swap(lda, ldb); + transA = rocblas_operation_none; + transB = trans; + uploA = rocblas_fill_full; + uploB = uplo; + diagA = rocblas_diagonal_non_unit; + diagB = diag; + } else { + rows_A = m; + rows_B = m; + cols_B = n; + transB = rocblas_operation_none; + transA = trans; + uploB= rocblas_fill_full; + uploA = uplo; + diagB = rocblas_diagonal_non_unit; + diagA = diag; + + } + + auto templ_param = std::make_tuple(uploA, uploB, diagA, diagB, transB); + + return rocblas_trmm_select_fill1(templ_param, handle, transA, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); +} + +} // namespace + +/* + * =========================================================================== + * C wrapper + * =========================================================================== + */ + +extern "C" { + +rocblas_status rocblas_port_strmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation trans, + rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, + const float* A, rocblas_int lda, const float* B, rocblas_int ldb, float* C, + rocblas_int ldc) +{ + return rocblas_trmm_select(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +} + +rocblas_status rocblas_port_dtrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation trans, + rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, + const double* A, rocblas_int lda, const double* B, rocblas_int ldb, double* C, + rocblas_int ldc) +{ + return rocblas_trmm_select(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +} + +rocblas_status rocblas_port_ctrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation trans, + rocblas_diagonal diag, rocblas_int m, rocblas_int n, const hipFloatComplex* alpha, + const hipFloatComplex* A, rocblas_int lda, const hipFloatComplex* B, rocblas_int ldb, + hipFloatComplex* C, rocblas_int ldc) +{ + return rocblas_trmm_select(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +} + +rocblas_status rocblas_port_ztrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation trans, + rocblas_diagonal diag, rocblas_int m, rocblas_int n, const hipDoubleComplex* alpha, + const hipDoubleComplex* A, rocblas_int lda, const hipDoubleComplex* B, + rocblas_int ldb, hipDoubleComplex* C, rocblas_int ldc) +{ + return rocblas_trmm_select(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +} + +} // extern "C" From d1f4eeb7f66cd5c3912d8d1b6c214fd347b19e17 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Mon, 4 Mar 2019 08:42:53 +0100 Subject: [PATCH 13/28] fixed test hloc --- apps/tests/test_hloc.cpp | 3 --- src/Potential/generate_d_operator_matrix.hpp | 2 +- src/simulation_parameters.hpp | 10 ++++++++++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/apps/tests/test_hloc.cpp b/apps/tests/test_hloc.cpp index 034f79db0..2ce4a10ad 100644 --- a/apps/tests/test_hloc.cpp +++ b/apps/tests/test_hloc.cpp @@ -79,9 +79,6 @@ void test_hloc(std::vector mpi_grid_dims__, double cutoff__, int num_bands_ t1.stop(); hloc.dismiss(); - if (pu == GPU && !phi.pw_coeffs(0).is_remapped()) { - hphi.pw_coeffs(0).copy_to(memory_t::host, 0, 4 * num_bands__); - } double diff{0}; for (int i = 0; i < 4 * num_bands__; i++) { diff --git a/src/Potential/generate_d_operator_matrix.hpp b/src/Potential/generate_d_operator_matrix.hpp index 6b9c4da4f..685edb35f 100644 --- a/src/Potential/generate_d_operator_matrix.hpp +++ b/src/Potential/generate_d_operator_matrix.hpp @@ -86,7 +86,7 @@ inline void Potential::generate_D_operator_matrix() d_tmp.zero(); if (ctx_.processing_unit() == device_t::GPU) { - la = linalg_t::cublas; + la = linalg_t::gpublas; mem = memory_t::device; d_tmp.zero(memory_t::device); veff_a.allocate(ctx_.mem_pool(memory_t::device)); diff --git a/src/simulation_parameters.hpp b/src/simulation_parameters.hpp index aca66fef1..f627eda1e 100644 --- a/src/simulation_parameters.hpp +++ b/src/simulation_parameters.hpp @@ -273,8 +273,18 @@ class Simulation_parameters { if (acc::num_devices() == 0) { processing_unit_ = device_t::CPU; + control_input_.processing_unit_ = "cpu"; } else { processing_unit_ = pu__; + if (pu__ == device_t::CPU) { + control_input_.processing_unit_ = "cpu"; + } else if (pu__ == device_t::GPU) { + control_input_.processing_unit_ = "gpu"; + } else { + std::stringstream s; + s << "wrong processing unit type"; + TERMINATE(s); + } } } From 6ac1a8c83f11eb0d384e2eba7bfa2dbcafda6940 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Tue, 5 Mar 2019 14:50:26 +0100 Subject: [PATCH 14/28] fixed MPI include directory in cmake --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb4a7b7c1..a16b0af5e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,7 +168,7 @@ include_directories(${FFTW_INCLUDE_DIR}) include_directories(${LIBXC_INCLUDE_DIR}) include_directories(${LIBSPG_INCLUDE_DIR}) include_directories(${HDF5_INCLUDE_DIR}) -include_directories(${MPI_INCLUDE_DIR}) +include_directories(${MPI_CXX_INCLUDE_DIRS}) if(USE_VDWXC) find_package(LibVDWXC 0.3.0 REQUIRED) From 1bd5254ef6d5c8d59f540c9c63a1a45f8d32524b Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Tue, 5 Mar 2019 14:50:44 +0100 Subject: [PATCH 15/28] added json output to test_hloc --- apps/tests/test_hloc.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/apps/tests/test_hloc.cpp b/apps/tests/test_hloc.cpp index 2ce4a10ad..f98d8b785 100644 --- a/apps/tests/test_hloc.cpp +++ b/apps/tests/test_hloc.cpp @@ -1,4 +1,6 @@ #include +#include +#include using namespace sirius; @@ -113,6 +115,7 @@ int main(int argn, char** argv) args.register_key("--use_gpu=", "{int} 0: CPU only, 1: hybrid CPU+GPU"); args.register_key("--gpu_ptr=", "{int} 0: start from CPU, 1: start from GPU"); args.register_key("--repeat=", "{int} number of repetitions"); + args.register_key("--t_file=", "{string} name of timing output file"); args.parse_args(argn, argv); if (args.exist("help")) { @@ -127,6 +130,7 @@ int main(int argn, char** argv) auto use_gpu = args.value("use_gpu", 0); auto gpu_ptr = args.value("gpu_ptr", 0); auto repeat = args.value("repeat", 3); + auto t_file = args.value("t_file", std::string("")); sirius::initialize(1); for (int i = 0; i < repeat; i++) { @@ -135,6 +139,11 @@ int main(int argn, char** argv) Communicator::world().barrier(); if (Communicator::world().rank() == 0) { utils::timer::print(); + + if (!t_file.empty()) { + std::ofstream json_file(t_file); + json_file << std::setw(2) << utils::timer::serialize() << std::endl; + } } Communicator::world().barrier(); sirius::finalize(); From 88e4d04baca5bedf9a90180478a8b27dd4c7d880 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Fri, 8 Mar 2019 09:39:34 +0100 Subject: [PATCH 16/28] fixed missing kernel declaration for ROCm --- src/Beta_projectors/beta_projectors_base.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Beta_projectors/beta_projectors_base.hpp b/src/Beta_projectors/beta_projectors_base.hpp index 62b00bd9f..33b45605f 100644 --- a/src/Beta_projectors/beta_projectors_base.hpp +++ b/src/Beta_projectors/beta_projectors_base.hpp @@ -28,7 +28,7 @@ namespace sirius { -#if defined(__GPU) && defined(__CUDA) +#if defined(__GPU) extern "C" void create_beta_gk_gpu(int num_atoms, int num_gkvec, int const* beta_desc, @@ -322,7 +322,7 @@ class Beta_projectors_base break; } case device_t::GPU: { -#if defined(__GPU) && defined(__CUDA) +#if defined(__GPU) auto& desc = chunk(ichunk__).desc_; create_beta_gk_gpu(chunk(ichunk__).num_atoms_, num_gkvec_loc(), @@ -331,8 +331,6 @@ class Beta_projectors_base gkvec_coord_.at(memory_t::device), chunk(ichunk__).atom_pos_.at(memory_t::device), pw_coeffs_a().at(memory_t::device)); -#else - throw std::runtime_error("create_beta_gk_gpu() not implemented for non-CUDA devices!"); #endif /* wave-functions are on CPU but the beta-projectors are on GPU */ if (gkvec_.comm().rank() == 0 && is_host_memory(ctx_.preferred_memory_t())) { From ce67ea0ce11656174e388670f235c0e8a4a2751b Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Fri, 8 Mar 2019 17:00:32 +0100 Subject: [PATCH 17/28] updated hipblas_port library --- src/SDDK/GPU/hipblas_port/CMakeLists.txt | 39 +- .../rocblas_port/port_helper_func.h | 6 +- .../hipblas_port/rocblas_port_gemm.hip.cpp | 447 +++----- .../hipblas_port/rocblas_port_gemv.hip.cpp | 53 +- .../hipblas_port/rocblas_port_trmm.hip.cpp | 588 +++-------- src/SDDK/GPU/hipblas_port/tests/axpy_test.cpp | 205 ++++ src/SDDK/GPU/hipblas_port/tests/gemm_test.cpp | 769 ++++++++++++++ src/SDDK/GPU/hipblas_port/tests/gemv_test.cpp | 341 +++++++ src/SDDK/GPU/hipblas_port/tests/ger_test.cpp | 221 ++++ src/SDDK/GPU/hipblas_port/tests/main.cpp | 8 + src/SDDK/GPU/hipblas_port/tests/trmm_test.cpp | 951 ++++++++++++++++++ 11 files changed, 2820 insertions(+), 808 deletions(-) create mode 100644 src/SDDK/GPU/hipblas_port/tests/axpy_test.cpp create mode 100644 src/SDDK/GPU/hipblas_port/tests/gemm_test.cpp create mode 100644 src/SDDK/GPU/hipblas_port/tests/gemv_test.cpp create mode 100644 src/SDDK/GPU/hipblas_port/tests/ger_test.cpp create mode 100644 src/SDDK/GPU/hipblas_port/tests/main.cpp create mode 100644 src/SDDK/GPU/hipblas_port/tests/trmm_test.cpp diff --git a/src/SDDK/GPU/hipblas_port/CMakeLists.txt b/src/SDDK/GPU/hipblas_port/CMakeLists.txt index 6c279732d..27a5af5be 100644 --- a/src/SDDK/GPU/hipblas_port/CMakeLists.txt +++ b/src/SDDK/GPU/hipblas_port/CMakeLists.txt @@ -1,22 +1,33 @@ -if (BUILD_HIPBLAS_TESTS) - add_subdirectory(ext/googletest) - add_executable(test_hipblas tests/main.cpp tests/gemv_test.cpp tests/gemm_test.cpp tests/trmm_test.cpp tests/ger_test.cpp tests/axpy_test.cpp) - target_link_libraries(run_tests ${ROCM_LIBRARIES} hipblas_port gtest_main) +if(NOT USE_ROCM) + message(FATAL_ERROR "CMake file must not be included without ROCm enabled!") endif() - -# set(DEFINITIONS_PROP "$") -# set(DEFINITIONS_GENERATOR "$<$:-D$>") -# set(INCLUDE_DIR_PROP "$") -# set(INCLUDE_DIR_GENERATOR "$<$:-I$>") - -rocm_hip_add_library(hipblas_port SHARED rocblas_port_gemv.hip.cpp hipblas_port.hip.cpp rocblas_port_gemm.hip.cpp rocblas_port_trmm.hip.cpp rocblas_port_ger.hip.cpp rocblas_port_axpy.hip.cpp +rocm_hip_add_library(hipblas_port SHARED + rocblas_port_gemv.hip.cpp hipblas_port.hip.cpp rocblas_port_gemm.hip.cpp + rocblas_port_trmm.hip.cpp rocblas_port_ger.hip.cpp rocblas_port_axpy.hip.cpp FLAGS "-Wno-macro-redefined -std=c++14" INCLUDE_DIRS ${ROCM_INCLUDE_DIRS}) +option(BUILD_HIPBLAS_TESTS "Build tests for custom implementation of blas functions in ROCm" OFF) if (BUILD_HIPBLAS_TESTS) - add_subdirectory(ext/googletest) - add_executable(test_hipblas tests/main.cpp tests/gemv_test.cpp tests/gemm_test.cpp tests/trmm_test.cpp tests/ger_test.cpp tests/axpy_test.cpp) - target_link_libraries(run_tests ${ROCM_LIBRARIES} hipblas_port gtest_main) + # download google test + set(BUILD_GMOCK OFF) + set(INSTALL_GTEST OFF) + include(FetchContent) # requires CMake 3.11 + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.8.1 + ) + FetchContent_GetProperties(googletest) + if(NOT googletest_POPULATED) + FetchContent_Populate(googletest) + add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR}) + endif() + + + add_executable(test_hipblas_port tests/main.cpp tests/gemv_test.cpp tests/gemm_test.cpp tests/trmm_test.cpp tests/ger_test.cpp tests/axpy_test.cpp) + target_link_libraries(test_hipblas_port ${ROCM_LIBRARIES} hipblas_port gtest_main) + target_include_directories(test_hipblas_port PRIVATE ${CMAKE_CURRENT_LIST_DIR}) endif() diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port/port_helper_func.h b/src/SDDK/GPU/hipblas_port/rocblas_port/port_helper_func.h index 4d76ffa0c..289968f1f 100644 --- a/src/SDDK/GPU/hipblas_port/rocblas_port/port_helper_func.h +++ b/src/SDDK/GPU/hipblas_port/rocblas_port/port_helper_func.h @@ -22,19 +22,19 @@ __host__ __device__ inline bool rb_port_cmp_and_real_only(const hipFloatComplex& * Conjugate helper functions */ template -struct rb_port_conj_op { +struct ConjOp { __host__ __device__ static inline T eval(const T& val) { return val; } }; template<> -struct rb_port_conj_op { +struct ConjOp { __host__ __device__ static inline hipDoubleComplex eval(const hipDoubleComplex& val) { return hipDoubleComplex(val.x, -val.y); } }; template<> -struct rb_port_conj_op { +struct ConjOp { __host__ __device__ static inline hipFloatComplex eval(const hipFloatComplex& val) { return hipFloatComplex(val.x, -val.y); } diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port_gemm.hip.cpp b/src/SDDK/GPU/hipblas_port/rocblas_port_gemm.hip.cpp index 6c42ff859..b3b7092ad 100644 --- a/src/SDDK/GPU/hipblas_port/rocblas_port_gemm.hip.cpp +++ b/src/SDDK/GPU/hipblas_port/rocblas_port_gemm.hip.cpp @@ -17,6 +17,8 @@ #include #include +#include +#include #include "rocblas_port/rocblas-types.h" #include "rocblas_port/status.h" #include "rocblas_port/definitions.h" @@ -27,308 +29,195 @@ namespace { -template -__global__ void gemmn_kernel(rocblas_int m, rocblas_int n, rocblas_int k, U alpha_device_host, const T* __restrict__ A, - rocblas_int lda, const T* __restrict__ B, rocblas_int ldb, U beta_device_host, T* C, - rocblas_int ldc) -{ - auto alpha = load_scalar(alpha_device_host); - auto beta = load_scalar(beta_device_host); - rocblas_int num_threads = hipBlockDim_x * hipBlockDim_y * hipBlockDim_z; - - if (DIM_X * DIM_Y != num_threads) - return; // need to launch exactly the same number of threads as template parameters indicate - - rocblas_int thread_id = hipThreadIdx_x + hipThreadIdx_y * hipBlockDim_x; - - // threads are all configurated locally - rocblas_int tx = thread_id % DIM_X; - rocblas_int ty = thread_id / DIM_X; - - rocblas_int ind; - - __shared__ T sdata[DIM_X * 4 * DIM_Y]; - - T res_A[4]; // micor tile is 4 * 4 - T res_B[4]; - - res_A[0] = res_B[0] = T(0.0); - res_A[1] = res_B[0] = T(0.0); - res_A[2] = res_B[0] = T(0.0); - res_A[3] = res_B[0] = T(0.0); - - ind = hipBlockIdx_x * DIM_X * 4 + tx; - - rocblas_int k_tail = k % (4 * DIM_Y); - rocblas_int col = ty * 4; - rocblas_int col_B = hipBlockIdx_y; - - B += col_B * MatrixDim::ld(ldb, 1); - - for (col = ty * 4; col < (k - k_tail); col += 4 * DIM_Y) { - res_B[0] = rb_port_conj_op::eval(B[(col + 0) * MatrixDim::inc(ldb, 1)]); - res_B[1] = rb_port_conj_op::eval(B[(col + 1) * MatrixDim::inc(ldb, 1)]); - res_B[2] = rb_port_conj_op::eval(B[(col + 2) * MatrixDim::inc(ldb, 1)]); - res_B[3] = rb_port_conj_op::eval(B[(col + 3) * MatrixDim::inc(ldb, 1)]); - - if (ind < m) { - res_A[0] += A[ind + (col + 0) * lda] * res_B[0]; - res_A[0] += A[ind + (col + 1) * lda] * res_B[1]; - res_A[0] += A[ind + (col + 2) * lda] * res_B[2]; - res_A[0] += A[ind + (col + 3) * lda] * res_B[3]; - } - - if (ind + DIM_X < m) { - res_A[1] += A[ind + DIM_X + (col + 0) * lda] * res_B[0]; - res_A[1] += A[ind + DIM_X + (col + 1) * lda] * res_B[1]; - res_A[1] += A[ind + DIM_X + (col + 2) * lda] * res_B[2]; - res_A[1] += A[ind + DIM_X + (col + 3) * lda] * res_B[3]; - } - - if (ind + 2 * DIM_X < m) { - res_A[2] += A[ind + 2 * DIM_X + (col + 0) * lda] * res_B[0]; - res_A[2] += A[ind + 2 * DIM_X + (col + 1) * lda] * res_B[1]; - res_A[2] += A[ind + 2 * DIM_X + (col + 2) * lda] * res_B[2]; - res_A[2] += A[ind + 2 * DIM_X + (col + 3) * lda] * res_B[3]; - } - - if (ind + 3 * DIM_X < m) { - res_A[3] += A[ind + 3 * DIM_X + (col + 0) * lda] * res_B[0]; - res_A[3] += A[ind + 3 * DIM_X + (col + 1) * lda] * res_B[1]; - res_A[3] += A[ind + 3 * DIM_X + (col + 2) * lda] * res_B[2]; - res_A[3] += A[ind + 3 * DIM_X + (col + 3) * lda] * res_B[3]; - } + +template +struct CreateReal { + template + __device__ __host__ static inline T eval(const U& val) { + return T(val); } +}; - // if n is not multiple of (DIM_Y * 4) - if (k_tail > 0) { - res_B[0] = - (col + 0 < k) ? rb_port_conj_op::eval(B[(col + 0) * MatrixDim::inc(ldb, 1)]) : T(0); - res_B[1] = - (col + 1 < k) ? rb_port_conj_op::eval(B[(col + 1) * MatrixDim::inc(ldb, 1)]) : T(0); - res_B[2] = - (col + 2 < k) ? rb_port_conj_op::eval(B[(col + 2) * MatrixDim::inc(ldb, 1)]) : T(0); - res_B[3] = - (col + 3 < k) ? rb_port_conj_op::eval(B[(col + 3) * MatrixDim::inc(ldb, 1)]) : T(0); - - if (ind < m) { - res_A[0] += A[ind + (col + 0) * lda * (col + 0 < k)] * res_B[0]; - res_A[0] += A[ind + (col + 1) * lda * (col + 1 < k)] * res_B[1]; - res_A[0] += A[ind + (col + 2) * lda * (col + 2 < k)] * res_B[2]; - res_A[0] += A[ind + (col + 3) * lda * (col + 3 < k)] * res_B[3]; - } - - if (ind + DIM_X < m) { - res_A[1] += A[ind + DIM_X + (col + 0) * lda * (col + 0 < k)] * res_B[0]; - res_A[1] += A[ind + DIM_X + (col + 1) * lda * (col + 1 < k)] * res_B[1]; - res_A[1] += A[ind + DIM_X + (col + 2) * lda * (col + 2 < k)] * res_B[2]; - res_A[1] += A[ind + DIM_X + (col + 3) * lda * (col + 3 < k)] * res_B[3]; - } - - if (ind + 2 * DIM_X < m) { - res_A[2] += A[ind + 2 * DIM_X + (col + 0) * lda * (col + 0 < k)] * res_B[0]; - res_A[2] += A[ind + 2 * DIM_X + (col + 1) * lda * (col + 1 < k)] * res_B[1]; - res_A[2] += A[ind + 2 * DIM_X + (col + 2) * lda * (col + 2 < k)] * res_B[2]; - res_A[2] += A[ind + 2 * DIM_X + (col + 3) * lda * (col + 3 < k)] * res_B[3]; - } - - if (ind + 3 * DIM_X < m) { - res_A[3] += A[ind + 3 * DIM_X + (col + 0) * lda * (col + 0 < k)] * res_B[0]; - res_A[3] += A[ind + 3 * DIM_X + (col + 1) * lda * (col + 1 < k)] * res_B[1]; - res_A[3] += A[ind + 3 * DIM_X + (col + 2) * lda * (col + 2 < k)] * res_B[2]; - res_A[3] += A[ind + 3 * DIM_X + (col + 3) * lda * (col + 3 < k)] * res_B[3]; - } +template<> +struct CreateReal { + template + __device__ __host__ static inline hipFloatComplex eval(const U& val) { + return hipFloatComplex((float)val, 0.f); } +}; - sdata[tx + ty * DIM_X * 4] = res_A[0]; - sdata[tx + DIM_X + ty * DIM_X * 4] = res_A[1]; - sdata[tx + 2 * DIM_X + ty * DIM_X * 4] = res_A[2]; - sdata[tx + 3 * DIM_X + ty * DIM_X * 4] = res_A[3]; +template<> +struct CreateReal { + template + __device__ __host__ static inline hipDoubleComplex eval(const U& val) { + return hipDoubleComplex((double)val, 0.); + } +}; - __syncthreads(); - ind = hipBlockIdx_x * DIM_X * 4 + thread_id; - if (thread_id < DIM_X * 4) { - for (rocblas_int i = 1; i < DIM_Y; i++) - sdata[thread_id] += sdata[thread_id + DIM_X * 4 * i]; - if (ind < m) - C[ind + col_B * ldc] = alpha * sdata[thread_id] + beta * C[ind + col_B * ldc]; +/* + * Load matrix value + */ +// transposed +template +struct MatrixLoadGemm { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int row, + const rocblas_int col) + { + return M[col + row * ld]; } -} - -template -__global__ void gemmc_kernel(rocblas_int cols_AT, U alpha_device_host, const T* __restrict__ A, rocblas_int lda, - const T* __restrict__ B, rocblas_int ldb, U beta_device_host, T* C, rocblas_int ldc) -{ - auto alpha = load_scalar(alpha_device_host); - auto beta = load_scalar(beta_device_host); - rocblas_int tx = hipThreadIdx_x; +}; + +// Normal +template<> +struct MatrixLoadGemm { + template + __device__ static inline T eval(const T* M, const rocblas_int ld, const rocblas_int row, const rocblas_int col) + { + return M[row + col * ld]; + } +}; - if (tx < cols_AT) - A += tx; - rocblas_int col_A = hipBlockIdx_x; - rocblas_int col_B = hipBlockIdx_y; - A += col_A * lda; - B += col_B * MatrixDim::ld(ldb, 1); - T res(0); +template +struct MatrixRowsGemm { + template + __host__ __device__ static inline T eval(const T rows, const U cols) { + // transpose or hermitian. + return cols; + } +}; - __shared__ T sdata[NB_X]; +template <> +struct MatrixRowsGemm { + template + __host__ __device__ static inline T eval(const T rows, const U cols) { + // transpose or hermitian. + return rows; + } +}; + +template +struct MatrixColsGemm { + template + __host__ __device__ static inline T eval(const T rows, const U cols) { + // transpose or hermitian. + return rows; + } +}; - // partial sums - rocblas_int cols_AT_full = (cols_AT / NB_X) * NB_X; +template <> +struct MatrixColsGemm { + template + __host__ __device__ static inline T eval(const T rows, const U cols) { + // transpose or hermitian. + return cols; + } +}; - for (rocblas_int i = 0; i < cols_AT_full; i += NB_X) - res += rb_port_conj_op::eval(A[i]) * rb_port_conj_op::eval(B[(tx + i) * MatrixDim::inc(ldb, 1)]); - if (tx + cols_AT_full < cols_AT) - res += rb_port_conj_op::eval(A[cols_AT_full]) * rb_port_conj_op::eval(B[(tx + cols_AT_full) * MatrixDim::inc(ldb, 1)]); +/* + * + */ +template +__global__ void gemm_kernel(rocblas_int m, rocblas_int n, rocblas_int k, U alpha_device_host, + const T* __restrict__ A, rocblas_int lda, const T* __restrict__ B, rocblas_int ldb, + U beta_device_host, T* C, rocblas_int ldc) +{ - sdata[tx] = res; + const int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + if (tx >= m) + return; - // tree reduction of partial sums, - if (NB_X > 16) { - rocblas_sum_reduce(tx, sdata); - } else { - __syncthreads(); + const auto alpha = load_scalar(alpha_device_host); + const auto beta = load_scalar(beta_device_host); + const int row_C = tx; + const int col_C = hipBlockIdx_y; - if (tx == 0) { - for (rocblas_int i = 1; i < cols_AT && i < NB_X; i++) - sdata[0] += sdata[i]; - } + const int cols_A = k; - __syncthreads(); + T res = CreateReal::eval(0); + for (int col = 0; col < cols_A; ++col) { + res += ConjOp::eval(MatrixLoadGemm::eval(A, lda, row_C, col)) * + ConjOp::eval(MatrixLoadGemm::eval(B, ldb, col, col_C)); } - if (tx == 0) - C[col_A + col_B * ldc] = alpha * sdata[0] + beta * C[col_A + col_B * ldc]; + C[row_C + col_C * ldc] = alpha * res + beta * C[row_C + col_C * ldc]; } -template -constexpr char rocblas_gemm_name[] = "unknown"; -template <> -constexpr char rocblas_gemm_name[] = "rocblas_sgemm"; -template <> -constexpr char rocblas_gemm_name[] = "rocblas_dgemm"; - -/*! \brief BLAS Level 2 API - - \details - xGEMM performs one of the matrix-vector operations - - y := alpha*A*x + beta*y, or - y := alpha*A**T*x + beta*y, or - y := alpha*A**H*x + beta*y, - - where alpha and beta are scalars, x and y are vectors and A is an - m by n matrix. - - @param[in] - handle rocblas_handle. - handle to the rocblas library context queue. - @param[in] - trans rocblas_operation - @param[in] - m rocblas_int - @param[in] - n rocblas_int - @param[in] - alpha - specifies the scalar alpha. - @param[in] - A pointer storing matrix A on the GPU. - @param[in] - lda rocblas_int - specifies the leading dimension of A. - @param[in] - x pointer storing vector x on the GPU. - @param[in] - incx specifies the increment for the elements of x. - @param[in] - beta specifies the scalar beta. - @param[out] - y pointer storing vector y on the GPU. - @param[in] - incy rocblas_int - specifies the increment for the elements of y. - - ********************************************************************/ - -template -rocblas_status rocblas_gemm(rocblas_handle handle, rocblas_operation transa, rocblas_int m, - rocblas_int n, rocblas_int k, const T* alpha, const T* A, rocblas_int lda, const T* B, - rocblas_int ldb, const T* beta, T* C, rocblas_int ldc) +template +rocblas_status rocblas_gemm(rocblas_handle handle, rocblas_int m, rocblas_int n, rocblas_int k, const T* alpha, + const T* A, rocblas_int lda, const T* B, rocblas_int ldb, const T* beta, T* C, + rocblas_int ldc) { if (!handle) return rocblas_status_invalid_handle; - if (!alpha || !beta) + if (!alpha) return rocblas_status_invalid_pointer; if (!A || !B || !C) return rocblas_status_invalid_pointer; - if (m < 0 || n < 0 || k < 0 || lda < m || lda < 1 || ldb < k || ldb < 1 || ldc < m || ldc < 1) - return rocblas_status_invalid_size; - - /* - * Quick return if possible. Not Argument error - */ if (!m || !n || !k) return rocblas_status_success; hipStream_t rocblas_stream = handle->rocblas_stream; - if (transa == rocblas_operation_none) { - // GEMMN_DIM_Y must be at least 4, 8 * 8 is very slow only 40Gflop/s - static constexpr int GEMMN_DIM_X = 32; - static constexpr int GEMMN_DIM_Y = 16; - rocblas_int blocks = (m - 1) / (GEMMN_DIM_X * 4) + 1; - - dim3 gemmn_grid(blocks, n); - dim3 gemmn_threads(GEMMN_DIM_X, GEMMN_DIM_Y); - - if (handle->pointer_mode == rocblas_pointer_mode_device) { - hipLaunchKernelGGL((gemmn_kernel), gemmn_grid, gemmn_threads, 0, rocblas_stream, - m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - } else { - if (rb_port_cmp_and_real_only(*alpha, 0.0) && rb_port_cmp_and_real_only(*beta, 1)) - return rocblas_status_success; - - hipLaunchKernelGGL((gemmn_kernel), gemmn_grid, gemmn_threads, 0, rocblas_stream, - m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); - } + dim3 threads(256); + dim3 grid((m + threads.x - 1) / threads.x, n); + + if (handle->pointer_mode == rocblas_pointer_mode_device) { + hipLaunchKernelGGL(gemm_kernel, + grid, threads, 0, rocblas_stream, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } else { - // transpose - // number of columns on the y-dim of the grid, using gemmc because gemmt(transpose) is a - // instance of gemmc (conjugate) - static constexpr int NB = 256; - dim3 gemmc_grid(m, n); - dim3 gemmc_threads(NB); - - if (handle->pointer_mode == rocblas_pointer_mode_device) { - if (transa == rocblas_operation_transpose) - hipLaunchKernelGGL(gemmc_kernel, gemmc_grid, gemmc_threads, 0, - rocblas_stream, k, alpha, A, lda, B, ldb, beta, C, ldc); - else - hipLaunchKernelGGL(gemmc_kernel, gemmc_grid, - gemmc_threads, 0, rocblas_stream, k, alpha, A, lda, B, ldb, beta, C, ldc); - } else { - if (rb_port_cmp_and_real_only(*alpha, 0) && rb_port_cmp_and_real_only(*beta, 1)) - return rocblas_status_success; - - if (transa == rocblas_operation_transpose) - hipLaunchKernelGGL(gemmc_kernel, gemmc_grid, gemmc_threads, 0, - rocblas_stream, k, *alpha, A, lda, B, ldb, *beta, C, ldc); - else - hipLaunchKernelGGL(gemmc_kernel, gemmc_grid, - gemmc_threads, 0, rocblas_stream, k, *alpha, A, lda, B, ldb, *beta, C, ldc); - } + if (rb_port_cmp_and_real_only(*alpha, 0)) + return rocblas_status_success; + hipLaunchKernelGGL(gemm_kernel, + grid, threads, 0, rocblas_stream, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); } return rocblas_status_success; } +template +rocblas_status rocblas_select_op_b(rocblas_handle handle, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const T* alpha, const T* A, + rocblas_int lda, const T* B, rocblas_int ldb, const T* beta, T* C, + rocblas_int ldc) +{ + if (transb == rocblas_operation_none) + return rocblas_gemm(handle, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + else if (transb == rocblas_operation_transpose) + return rocblas_gemm(handle, m, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); + else if (transb == rocblas_operation_conjugate_transpose) + return rocblas_gemm(handle, m, n, k, alpha, A, lda, B, ldb, + beta, C, ldc); + else + return rocblas_status_not_implemented; +} + +template +rocblas_status rocblas_select(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, + rocblas_int m, rocblas_int n, rocblas_int k, const T* alpha, const T* A, + rocblas_int lda, const T* B, rocblas_int ldb, const T* beta, T* C, + rocblas_int ldc) +{ + if (transa == rocblas_operation_none) + return rocblas_select_op_b(handle, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); + else if (transa == rocblas_operation_transpose) + return rocblas_select_op_b(handle, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); + else if (transa == rocblas_operation_conjugate_transpose) + return rocblas_select_op_b(handle, transb, m, n, k, alpha, A, lda, B, + ldb, beta, C, ldc); + else + return rocblas_status_not_implemented; +} + } // namespace /* @@ -344,14 +233,7 @@ rocblas_status rocblas_port_sgemm(rocblas_handle handle, rocblas_operation trans rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, rocblas_int ldc) { - if (transb == rocblas_operation_none) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else if (transb == rocblas_operation_transpose) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else if (transb == rocblas_operation_conjugate_transpose) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else - return rocblas_status_not_implemented; + return rocblas_select(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } rocblas_status rocblas_port_dgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, @@ -359,14 +241,7 @@ rocblas_status rocblas_port_dgemm(rocblas_handle handle, rocblas_operation trans rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, rocblas_int ldc) { - if (transb == rocblas_operation_none) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else if (transb == rocblas_operation_transpose) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else if (transb == rocblas_operation_conjugate_transpose) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else - return rocblas_status_not_implemented; + return rocblas_select(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } rocblas_status rocblas_port_cgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, @@ -374,14 +249,7 @@ rocblas_status rocblas_port_cgemm(rocblas_handle handle, rocblas_operation trans const hipFloatComplex* A, rocblas_int lda, const hipFloatComplex* B, rocblas_int ldb, const hipFloatComplex* beta, hipFloatComplex* C, rocblas_int ldc) { - if (transb == rocblas_operation_none) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else if (transb == rocblas_operation_transpose) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else if (transb == rocblas_operation_conjugate_transpose) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else - return rocblas_status_not_implemented; + return rocblas_select(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } rocblas_status rocblas_port_zgemm(rocblas_handle handle, rocblas_operation transa, rocblas_operation transb, @@ -389,13 +257,6 @@ rocblas_status rocblas_port_zgemm(rocblas_handle handle, rocblas_operation trans const hipDoubleComplex* A, rocblas_int lda, const hipDoubleComplex* B, rocblas_int ldb, const hipDoubleComplex* beta, hipDoubleComplex* C, rocblas_int ldc) { - if (transb == rocblas_operation_none) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else if (transb == rocblas_operation_transpose) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else if (transb == rocblas_operation_conjugate_transpose) - return rocblas_gemm(handle, transa, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); - else - return rocblas_status_not_implemented; + return rocblas_select(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } } // extern "C" diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port_gemv.hip.cpp b/src/SDDK/GPU/hipblas_port/rocblas_port_gemv.hip.cpp index 5051677a3..dac6acd0a 100644 --- a/src/SDDK/GPU/hipblas_port/rocblas_port_gemv.hip.cpp +++ b/src/SDDK/GPU/hipblas_port/rocblas_port_gemv.hip.cpp @@ -173,10 +173,10 @@ __global__ void gemvc_kernel(rocblas_int m, rocblas_int n, U alpha_device_host, rocblas_int m_full = (m / NB_X) * NB_X; for (rocblas_int i = 0; i < m_full; i += NB_X) - res += rb_port_conj_op::eval(A[i]) * x[(tx + i) * incx]; + res += ConjOp::eval(A[i]) * x[(tx + i) * incx]; if (tx + m_full < m) - res += rb_port_conj_op::eval(A[m_full]) * x[(tx + m_full) * incx]; + res += ConjOp::eval(A[m_full]) * x[(tx + m_full) * incx]; sdata[tx] = res; @@ -198,55 +198,6 @@ __global__ void gemvc_kernel(rocblas_int m, rocblas_int n, U alpha_device_host, y[col * incy] = alpha * sdata[0] + beta * y[col * incy]; } -template -constexpr char rocblas_gemv_name[] = "unknown"; -template <> -constexpr char rocblas_gemv_name[] = "rocblas_sgemv"; -template <> -constexpr char rocblas_gemv_name[] = "rocblas_dgemv"; - -/*! \brief BLAS Level 2 API - - \details - xGEMV performs one of the matrix-vector operations - - y := alpha*A*x + beta*y, or - y := alpha*A**T*x + beta*y, or - y := alpha*A**H*x + beta*y, - - where alpha and beta are scalars, x and y are vectors and A is an - m by n matrix. - - @param[in] - handle rocblas_handle. - handle to the rocblas library context queue. - @param[in] - trans rocblas_operation - @param[in] - m rocblas_int - @param[in] - n rocblas_int - @param[in] - alpha - specifies the scalar alpha. - @param[in] - A pointer storing matrix A on the GPU. - @param[in] - lda rocblas_int - specifies the leading dimension of A. - @param[in] - x pointer storing vector x on the GPU. - @param[in] - incx specifies the increment for the elements of x. - @param[in] - beta specifies the scalar beta. - @param[out] - y pointer storing vector y on the GPU. - @param[in] - incy rocblas_int - specifies the increment for the elements of y. - - ********************************************************************/ template rocblas_status rocblas_gemv(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, diff --git a/src/SDDK/GPU/hipblas_port/rocblas_port_trmm.hip.cpp b/src/SDDK/GPU/hipblas_port/rocblas_port_trmm.hip.cpp index a90c95d75..47bae7b2a 100644 --- a/src/SDDK/GPU/hipblas_port/rocblas_port_trmm.hip.cpp +++ b/src/SDDK/GPU/hipblas_port/rocblas_port_trmm.hip.cpp @@ -26,6 +26,7 @@ #include "rocblas_port/utility.h" #include "rocblas_port/reduction.h" #include "rocblas_port/port_helper_func.h" +#include namespace { @@ -194,310 +195,100 @@ struct MatrixLoad } }; -/* - * A*B and A*B^t and A*B^H - */ -template -__global__ void trmmn_kernel(rocblas_int m, rocblas_int n, U alpha_device_host, const T* __restrict__ A, - rocblas_int lda, const T* __restrict__ B, rocblas_int ldb, T* C, rocblas_int ldc) -{ - auto alpha = load_scalar(alpha_device_host); - rocblas_int num_threads = hipBlockDim_x * hipBlockDim_y * hipBlockDim_z; - - if (DIM_X * DIM_Y != num_threads) - return; // need to launch exactly the same number of threads as template parameters indicate - - rocblas_int thread_id = hipThreadIdx_x + hipThreadIdx_y * hipBlockDim_x; - - // threads are all configurated locally - rocblas_int tx = thread_id % DIM_X; - rocblas_int ty = thread_id / DIM_X; - - rocblas_int ind; - - __shared__ T sdata[DIM_X * 4 * DIM_Y]; - - T res_A[4]; // micor tile is 4 * 4 - T res_B[4]; - - res_A[0] = res_B[0] = T(0.0); - res_A[1] = res_B[0] = T(0.0); - res_A[2] = res_B[0] = T(0.0); - res_A[3] = res_B[0] = T(0.0); - - ind = hipBlockIdx_x * DIM_X * 4 + tx; - - rocblas_int n_tail = n % (4 * DIM_Y); - rocblas_int col = ty * 4; - rocblas_int col_B = hipBlockIdx_y; - - // B += col_B * ldb; - for (col = ty * 4; col < (n - n_tail); col += 4 * DIM_Y) { - res_B[0] = MatrixLoad::eval(B, ldb, 1, col + 0, col_B); - res_B[1] = MatrixLoad::eval(B, ldb, 1, col + 1, col_B); - res_B[2] = MatrixLoad::eval(B, ldb, 1, col + 2, col_B); - res_B[3] = MatrixLoad::eval(B, ldb, 1, col + 3, col_B); - - res_B[0] = rb_port_conj_op::eval(res_B[0]); - res_B[1] = rb_port_conj_op::eval(res_B[1]); - res_B[2] = rb_port_conj_op::eval(res_B[2]); - res_B[3] = rb_port_conj_op::eval(res_B[3]); - - if (ind < m) { - res_A[0] += MatrixLoad::eval(A, lda, 1, ind, (col + 0)) * res_B[0]; - res_A[0] += MatrixLoad::eval(A, lda, 1, ind, (col + 1)) * res_B[1]; - res_A[0] += MatrixLoad::eval(A, lda, 1, ind, (col + 2)) * res_B[2]; - res_A[0] += MatrixLoad::eval(A, lda, 1, ind, (col + 3)) * res_B[3]; - } - - if (ind + DIM_X < m) { - res_A[1] += - MatrixLoad::eval(A, lda, 1, ind + DIM_X, (col + 0)) * res_B[0]; - res_A[1] += - MatrixLoad::eval(A, lda, 1, ind + DIM_X, (col + 1)) * res_B[1]; - res_A[1] += - MatrixLoad::eval(A, lda, 1, ind + DIM_X, (col + 2)) * res_B[2]; - res_A[1] += - MatrixLoad::eval(A, lda, 1, ind + DIM_X, (col + 3)) * res_B[3]; - } - - if (ind + 2 * DIM_X < m) { - res_A[2] += - MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, (col + 0)) * - res_B[0]; - res_A[2] += - MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, (col + 1)) * - res_B[1]; - res_A[2] += - MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, (col + 2)) * - res_B[2]; - res_A[2] += - MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, (col + 3)) * - res_B[3]; - } - - if (ind + 3 * DIM_X < m) { - res_A[3] += - MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, (col + 0)) * - res_B[0]; - res_A[3] += - MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, (col + 1)) * - res_B[1]; - res_A[3] += - MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, (col + 2)) * - res_B[2]; - res_A[3] += - MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, (col + 3)) * - res_B[3]; - } +template +struct MatrixRows { + template + __host__ __device__ static inline T eval(const T rows, const U cols) { + // transpose or hermitian. + return cols; } +}; - // if n is not multiple of (DIM_Y * 4) - if (n_tail > 0) { - res_B[0] = T(0); - res_B[1] = T(0); - res_B[2] = T(0); - res_B[3] = T(0); - - if (col + 0 < n) - res_B[0] = MatrixLoad::eval(B, ldb, 1, col + 0, col_B); - if (col + 1 < n) - res_B[1] = MatrixLoad::eval(B, ldb, 1, col + 1, col_B); - if (col + 2 < n) - res_B[2] = MatrixLoad::eval(B, ldb, 1, col + 2, col_B); - if (col + 3 < n) - res_B[3] = MatrixLoad::eval(B, ldb, 1, col + 3, col_B); - - res_B[0] = rb_port_conj_op::eval(res_B[0]); - res_B[1] = rb_port_conj_op::eval(res_B[1]); - res_B[2] = rb_port_conj_op::eval(res_B[2]); - res_B[3] = rb_port_conj_op::eval(res_B[3]); - - if (ind < m) { - res_A[0] += - MatrixLoad::eval(A, lda, 1, ind, (col + 0) * (col + 0 < n)) * - res_B[0]; - res_A[0] += - MatrixLoad::eval(A, lda, 1, ind, (col + 1) * (col + 1 < n)) * - res_B[1]; - res_A[0] += - MatrixLoad::eval(A, lda, 1, ind, (col + 2) * (col + 2 < n)) * - res_B[2]; - res_A[0] += - MatrixLoad::eval(A, lda, 1, ind, (col + 3) * (col + 3 < n)) * - res_B[3]; - } - - if (ind + DIM_X < m) { - res_A[1] += MatrixLoad::eval(A, lda, 1, ind + DIM_X, - (col + 0) * (col + 0 < n)) * - res_B[0]; - res_A[1] += MatrixLoad::eval(A, lda, 1, ind + DIM_X, - (col + 1) * (col + 1 < n)) * - res_B[1]; - res_A[1] += MatrixLoad::eval(A, lda, 1, ind + DIM_X, - (col + 2) * (col + 2 < n)) * - res_B[2]; - res_A[1] += MatrixLoad::eval(A, lda, 1, ind + DIM_X, - (col + 3) * (col + 3 < n)) * - res_B[3]; - } - - if (ind + 2 * DIM_X < m) { - res_A[2] += MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, - (col + 0) * (col + 0 < n)) * - res_B[0]; - res_A[2] += MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, - (col + 1) * (col + 1 < n)) * - res_B[1]; - res_A[2] += MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, - (col + 2) * (col + 2 < n)) * - res_B[2]; - res_A[2] += MatrixLoad::eval(A, lda, 1, ind + 2 * DIM_X, - (col + 3) * (col + 3 < n)) * - res_B[3]; - } - - if (ind + 3 * DIM_X < m) { - res_A[3] += MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, - (col + 0) * (col + 0 < n)) * - res_B[0]; - res_A[3] += MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, - (col + 1) * (col + 1 < n)) * - res_B[1]; - res_A[3] += MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, - (col + 2) * (col + 2 < n)) * - res_B[2]; - res_A[3] += MatrixLoad::eval(A, lda, 1, ind + 3 * DIM_X, - (col + 3) * (col + 3 < n)) * - res_B[3]; - } +template <> +struct MatrixRows { + template + __host__ __device__ static inline T eval(const T rows, const U cols) { + // transpose or hermitian. + return rows; } +}; - sdata[tx + ty * DIM_X * 4] = res_A[0]; - sdata[tx + DIM_X + ty * DIM_X * 4] = res_A[1]; - sdata[tx + 2 * DIM_X + ty * DIM_X * 4] = res_A[2]; - sdata[tx + 3 * DIM_X + ty * DIM_X * 4] = res_A[3]; +template +struct MatrixCols { + template + __host__ __device__ static inline T eval(const T rows, const U cols) { + // transpose or hermitian. + return rows; + } +}; - __syncthreads(); +template <> +struct MatrixCols { + template + __host__ __device__ static inline T eval(const T rows, const U cols) { + // transpose or hermitian. + return cols; + } +}; - ind = hipBlockIdx_x * DIM_X * 4 + thread_id; - if (thread_id < DIM_X * 4) { - for (rocblas_int i = 1; i < DIM_Y; i++) - sdata[thread_id] += sdata[thread_id + DIM_X * 4 * i]; +template +struct MatrixStore +{ + template + __device__ static inline T eval(T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col, const U& val) + { + return M[col * inc + row * ld] = val;; + } +}; - if (ind < m) - C[ind + col_B * ldc] = alpha * sdata[thread_id]; +template <> +struct MatrixStore +{ + template + __device__ static inline void eval(T* M, const rocblas_int ld, const rocblas_int inc, const rocblas_int row, + const rocblas_int col, const U& val) + { + M[row * inc + col * ld] = val; } -} +}; -template -__global__ void trmmc_kernel(rocblas_int m, rocblas_int n, U alpha_device_host, const T* __restrict__ A, - rocblas_int lda, const T* __restrict__ B, rocblas_int ldb, T* C, rocblas_int ldc) +/* + * + */ +template +__global__ void trmmn_kernel_a_t_h(rocblas_int m, rocblas_int n, U alpha_device_host, const T* __restrict__ A, + rocblas_int lda, const T* __restrict__ B, rocblas_int ldb, T* C, rocblas_int ldc) { - auto alpha = load_scalar(alpha_device_host); - rocblas_int tx = hipThreadIdx_x; + const int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + if (tx >= m) + return; - // if (tx < m) - // A += tx; - rocblas_int tx_load_id = 0; - if (tx < m) - tx_load_id = tx; + const auto alpha = load_scalar(alpha_device_host); + const int row_op_C = MatrixRows::eval(tx, hipBlockIdx_y); + const int col_op_C = MatrixCols::eval(tx, hipBlockIdx_y); - rocblas_int col_A = hipBlockIdx_x; - rocblas_int col_B = hipBlockIdx_y; - // A += col_A * lda; - // B += col_B * ldb; + const int rows_B = MatrixRows::eval(m, n); T res(0); - - __shared__ T sdata[NB_X]; - - // partial sums - rocblas_int m_full = (m / NB_X) * NB_X; - - for (rocblas_int i = 0; i < m_full; i += NB_X) - res += - rb_port_conj_op::eval(MatrixLoad::eval(A, lda, 1, i + tx_load_id, col_A)) * - rb_port_conj_op::eval(MatrixLoad::eval(B, ldb, 1, (tx + i), col_B)); - - if (tx + m_full < m) - res += rb_port_conj_op::eval( - MatrixLoad::eval(A, lda, 1, m_full + tx_load_id, col_A)) * - rb_port_conj_op::eval(MatrixLoad::eval(B, ldb, 1, (tx + m_full), col_B)); - - sdata[tx] = res; - - // tree reduction of partial sums, - if (NB_X > 16) { - rocblas_sum_reduce(tx, sdata); - } else { - __syncthreads(); - - if (tx == 0) { - for (rocblas_int i = 1; i < m && i < NB_X; i++) - sdata[0] += sdata[i]; - } - - __syncthreads(); + for (int col = 0; col < rows_B; ++col) { + res += ConjOp::eval(MatrixLoad::eval(A, lda, 1, row_op_C, col)) * + MatrixLoad::eval(B, ldb, 1, col, col_op_C); } - if (tx == 0) - C[col_A + col_B * ldc] = alpha * sdata[0]; + MatrixStore::eval(C, ldc, 1, row_op_C, col_op_C, res*alpha); } -/*! \brief BLAS Level 2 API - - \details - xGEMM performs one of the matrix-vector operations - - y := alpha*A*x + beta*y, or - y := alpha*A**T*x + beta*y, or - y := alpha*A**H*x + beta*y, - - where alpha and beta are scalars, x and y are vectors and A is an - m by n matrix. - - @param[in] - handle rocblas_handle. - handle to the rocblas library context queue. - @param[in] - trans rocblas_operation - @param[in] - m rocblas_int - @param[in] - n rocblas_int - @param[in] - alpha - specifies the scalar alpha. - @param[in] - A pointer storing matrix A on the GPU. - @param[in] - lda rocblas_int - specifies the leading dimension of A. - @param[in] - x pointer storing vector x on the GPU. - @param[in] - incx specifies the increment for the elements of x. - @param[in] - beta specifies the scalar beta. - @param[out] - y pointer storing vector y on the GPU. - @param[in] - incy rocblas_int - specifies the increment for the elements of y. - - ********************************************************************/ - -template -rocblas_status rocblas_trmm(rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, - rocblas_int cols_B, const T* alpha, const T* A, rocblas_int lda, const T* B, - rocblas_int ldb, T* C, rocblas_int ldc) + + +template +rocblas_status rocblas_trmm(rocblas_handle handle, rocblas_int m, rocblas_int n, const T* alpha, const T* A, + rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) { if (!handle) return rocblas_status_invalid_handle; @@ -507,154 +298,58 @@ rocblas_status rocblas_trmm(rocblas_handle handle, rocblas_operation transa, roc if (!A || !B || !C) return rocblas_status_invalid_pointer; - if (rows_A < 0 || rows_B < 0 || cols_B < 0 || lda < rows_A || lda < 1 || ldb < rows_B || ldb < 1 || ldc < rows_A || ldc < 1) - return rocblas_status_invalid_size; - - /* - * Quick return if possible. Not Argument error - */ - if (!rows_A || !rows_B || !cols_B) + if (!m || !n) return rocblas_status_success; hipStream_t rocblas_stream = handle->rocblas_stream; - if (transa == rocblas_operation_none) { - // GEMMN_DIM_Y must be at least 4, 8 * 8 is very slow only 40Gflop/s - static constexpr int GEMMN_DIM_X = 32; - static constexpr int GEMMN_DIM_Y = 16; - rocblas_int blocks = (rows_A - 1) / (GEMMN_DIM_X * 4) + 1; - - dim3 trmmn_grid(blocks, cols_B); - dim3 trmmn_threads(GEMMN_DIM_X, GEMMN_DIM_Y); - - if (handle->pointer_mode == rocblas_pointer_mode_device) { - hipLaunchKernelGGL((trmmn_kernel), - trmmn_grid, trmmn_threads, 0, rocblas_stream, rows_A, rows_B, alpha, A, lda, B, ldb, C, ldc); - } else { - if (rb_port_cmp_and_real_only(*alpha, 0.0)) - return rocblas_status_success; + dim3 threads(256); + dim3 grid((m + threads.x - 1) / threads.x, n); - hipLaunchKernelGGL((trmmn_kernel), - trmmn_grid, trmmn_threads, 0, rocblas_stream, rows_A, rows_B, *alpha, A, lda, B, ldb, C, ldc); - } + if (handle->pointer_mode == rocblas_pointer_mode_device) { + hipLaunchKernelGGL(trmmn_kernel_a_t_h, + grid, threads, 0, rocblas_stream, m, n, alpha, A, lda, B, ldb, C, ldc); } else { - // transpose - // number of columns on the y-dim of the grid, using trmmc because trmmt(transpose) is a - // instance of trmmc (conjugate) - static constexpr int NB = 256; - dim3 trmmc_grid(rows_B, cols_B); - dim3 trmmc_threads(NB); - - if (handle->pointer_mode == rocblas_pointer_mode_device) { - if (transa == rocblas_operation_transpose) - hipLaunchKernelGGL(trmmc_kernel, - trmmc_grid, trmmc_threads, 0, rocblas_stream, rows_A, rows_B, alpha, A, lda, B, ldb, C, ldc); - else - hipLaunchKernelGGL( - trmmc_kernel, - trmmc_grid, trmmc_threads, 0, rocblas_stream, rows_A, rows_B, alpha, A, lda, B, ldb, C, ldc); - } else { - if (rb_port_cmp_and_real_only(*alpha, 0)) - return rocblas_status_success; - - if (transa == rocblas_operation_transpose) - hipLaunchKernelGGL(trmmc_kernel, - trmmc_grid, trmmc_threads, 0, rocblas_stream, rows_A, rows_B, *alpha, A, lda, B, ldb, C, ldc); - else - hipLaunchKernelGGL( - trmmc_kernel, - trmmc_grid, trmmc_threads, 0, rocblas_stream, rows_A, rows_B, *alpha, A, lda, B, ldb, C, ldc); - } + if (rb_port_cmp_and_real_only(*alpha, 0)) + return rocblas_status_success; + hipLaunchKernelGGL(trmmn_kernel_a_t_h, + grid, threads, 0, rocblas_stream, m, n, *alpha, A, lda, B, ldb, C, ldc); } return rocblas_status_success; } -/* - * =========================================================================== - * Helper functions to select template parameters - * =========================================================================== - */ -template -rocblas_status rocblas_trmm_select_op_b( - std::tuple templ_param, - rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, - const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) -{ - if (std::get<4>(templ_param) == rocblas_operation_none) { - return rocblas_trmm( - handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); - } else if (std::get<4>(templ_param) == rocblas_operation_transpose) { - return rocblas_trmm( - handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); - } else { - return rocblas_trmm( - handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); - } -} - -template -rocblas_status rocblas_trmm_select_diag2( - std::tuple templ_param, - rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, - const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) -{ - if (std::get<3>(templ_param) == rocblas_diagonal_unit) { - return rocblas_trmm_select_op_b( - templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); - } else { - return rocblas_trmm_select_op_b( - templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); - } -} - -template -rocblas_status rocblas_trmm_select_diag1( - std::tuple templ_param, - rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, - const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) -{ - if (std::get<2>(templ_param) == rocblas_diagonal_unit) { - return rocblas_trmm_select_diag2( - templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); - } else { - return rocblas_trmm_select_diag2( - templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); - } -} - -template -rocblas_status rocblas_trmm_select_fill2( - std::tuple templ_param, - rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, - const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) +template +rocblas_status rocblas_trmm_select_diag(rocblas_handle handle, rocblas_diagonal diag, rocblas_int m, rocblas_int n, + const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, + rocblas_int ldc) { - if (std::get<1>(templ_param) == rocblas_fill_lower) { - return rocblas_trmm_select_diag1(templ_param, handle, transa, rows_A, rows_B, - cols_B, alpha, A, lda, B, ldb, C, ldc); - } else if (std::get<1>(templ_param) == rocblas_fill_upper) { - return rocblas_trmm_select_diag1(templ_param, handle, transa, rows_A, rows_B, - cols_B, alpha, A, lda, B, ldb, C, ldc); + if (diag == rocblas_diagonal_unit) { + return rocblas_trmm(handle, m, n, alpha, A, lda, + B, ldb, C, ldc); } else { - return rocblas_trmm_select_diag1(templ_param, handle, transa, rows_A, rows_B, cols_B, - alpha, A, lda, B, ldb, C, ldc); + return rocblas_trmm(handle, m, n, alpha, A, + lda, B, ldb, C, ldc); } } -template -rocblas_status rocblas_trmm_select_fill1( - std::tuple templ_param, - rocblas_handle handle, rocblas_operation transa, rocblas_int rows_A, rocblas_int rows_B, rocblas_int cols_B, - const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) +template +rocblas_status rocblas_trmm_select_fill(rocblas_handle handle, rocblas_fill uplo, rocblas_diagonal diag, rocblas_int m, + rocblas_int n, const T* alpha, const T* A, rocblas_int lda, const T* B, + rocblas_int ldb, T* C, rocblas_int ldc) { - if (std::get<0>(templ_param) == rocblas_fill_lower) { - return rocblas_trmm_select_fill2(templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, - A, lda, B, ldb, C, ldc); - } else if (std::get<0>(templ_param) == rocblas_fill_upper) { - return rocblas_trmm_select_fill2(templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, - A, lda, B, ldb, C, ldc); + if (uplo == rocblas_fill_lower) { + return rocblas_trmm_select_diag(handle, diag, m, n, alpha, + A, lda, B, ldb, C, ldc); + } else if (uplo == rocblas_fill_upper) { + return rocblas_trmm_select_diag(handle, diag, m, n, alpha, + A, lda, B, ldb, C, ldc); } else { - return rocblas_trmm_select_fill2(templ_param, handle, transa, rows_A, rows_B, cols_B, alpha, - A, lda, B, ldb, C, ldc); + return rocblas_trmm_select_diag(handle, diag, m, n, alpha, + A, lda, B, ldb, C, ldc); } } @@ -663,40 +358,39 @@ rocblas_status rocblas_trmm_select(rocblas_handle handle, rocblas_side side, roc rocblas_diagonal diag, rocblas_int m, rocblas_int n, const T* alpha, const T* A, rocblas_int lda, const T* B, rocblas_int ldb, T* C, rocblas_int ldc) { - rocblas_operation transA, transB; - rocblas_fill uploA, uploB; - rocblas_diagonal diagA, diagB; - rocblas_int rows_A, rows_B, cols_B; - - // create parameters according to multiplication order - if (side == rocblas_side_right) { - rows_A = m; - rows_B = n; - cols_B = n; - std::swap(A, B); - std::swap(lda, ldb); - transA = rocblas_operation_none; - transB = trans; - uploA = rocblas_fill_full; - uploB = uplo; - diagA = rocblas_diagonal_non_unit; - diagB = diag; + if (side == rocblas_side_left) { + if (trans == rocblas_operation_none) { + return rocblas_trmm_select_fill(handle, uplo, diag, m, n, alpha, A, lda, B, ldb, C, + ldc); + } else if (trans == rocblas_operation_transpose) { + return rocblas_trmm_select_fill(handle, uplo, diag, m, n, alpha, A, lda, B, ldb, C, + ldc); + } else { + return rocblas_trmm_select_fill(handle, uplo, diag, m, n, alpha, A, lda, B, ldb, C, + ldc); + } } else { - rows_A = m; - rows_B = m; - cols_B = n; - transB = rocblas_operation_none; - transA = trans; - uploB= rocblas_fill_full; - uploA = uplo; - diagB = rocblas_diagonal_non_unit; - diagA = diag; - + // Use the following identities: + // B*A = (AT*BT)T + // B*AT = (A*BT)T + if (trans == rocblas_operation_none) { + return rocblas_trmm_select_fill( + handle, uplo, diag, m, n, alpha, A, lda, B, ldb, C, ldc); + } else if (trans == rocblas_operation_transpose) { + return rocblas_trmm_select_fill(handle, uplo, diag, m, n, alpha, A, lda, B, + ldb, C, ldc); + } else { + return rocblas_trmm_select_fill( + handle, uplo, diag, m, n, alpha, A, lda, B, ldb, C, ldc); + } } - - auto templ_param = std::make_tuple(uploA, uploB, diagA, diagB, transB); - - return rocblas_trmm_select_fill1(templ_param, handle, transA, rows_A, rows_B, cols_B, alpha, A, lda, B, ldb, C, ldc); } } // namespace diff --git a/src/SDDK/GPU/hipblas_port/tests/axpy_test.cpp b/src/SDDK/GPU/hipblas_port/tests/axpy_test.cpp new file mode 100644 index 000000000..e324088c3 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/tests/axpy_test.cpp @@ -0,0 +1,205 @@ +#ifdef __CUDA +#include +#include +#include +#else +#include +#include +#include +#include "hipblas_port.h" +#endif + + +#include +#include "gtest/gtest.h" +// #define CATCH_CONFIG_MAIN +// #include "catch.hpp" + +using testing::Types; + +#ifdef __CUDA +#define GPU_PREFIX(val) cuda##val +#else +#define GPU_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define BLAS_PREFIX(val) cu##val +#else +#define BLAS_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define GPU_PREFIX_CAPS(val) CU##val +#else +#define GPU_PREFIX_CAPS(val) HIP##val +#endif + +template +struct create_real { + template + static inline T eval(const U& val) { + return T(val); + } +}; + +template<> +struct create_real { + template + static inline BLAS_PREFIX(FloatComplex) eval(const U& val) { + BLAS_PREFIX(FloatComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template<> +struct create_real { + template + static inline BLAS_PREFIX(DoubleComplex) eval(const U& val) { + BLAS_PREFIX(DoubleComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template +struct create_complex +{ + template + static inline T eval(const U1& val1, const U2& val2) + { + T c; + c.x = val1; + c.y = val2; + return c; + } +}; + +template +inline double get_real_double(const T& val) { + return double(val); +} + +template<> +inline double get_real_double(const BLAS_PREFIX(FloatComplex)& val) { + return double(val.x); +} + +template<> +inline double get_real_double(const BLAS_PREFIX(DoubleComplex)& val) { + return double(val.x); +} + +inline BLAS_PREFIX(blasStatus_t) call_axpy(BLAS_PREFIX(blasHandle_t) handle, int n, const float* alpha, const float* x, int incx, + float* y, int incy) +{ + +#ifdef __CUDA + return cublasSaxpy(handle, n, alpha, x, incx, y, incy); +#else + return BLAS_PREFIX(blas_port_Saxpy)(handle, n, alpha, x, incx, y, incy); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) call_axpy(BLAS_PREFIX(blasHandle_t) handle, int n, const double* alpha, const double* x, int incx, + double* y, int incy) +{ + +#ifdef __CUDA + return BLAS_PREFIX(blasDaxpy)(handle, n, alpha, x, incx, y, incy); +#else + return BLAS_PREFIX(blas_port_Daxpy)(handle, n, alpha, x, incx, y, incy); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) call_axpy(BLAS_PREFIX(blasHandle_t) handle, int n, const BLAS_PREFIX(FloatComplex)* alpha, + const BLAS_PREFIX(FloatComplex)* x, int incx, BLAS_PREFIX(FloatComplex)* y, int incy) +{ + +#ifdef __CUDA + return BLAS_PREFIX(blasCaxpy)(handle, n, alpha, x, incx, y, incy); +#else + return BLAS_PREFIX(blas_port_Caxpy)(handle, n, alpha, x, incx, y, incy); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) call_axpy(BLAS_PREFIX(blasHandle_t) handle, int n, const BLAS_PREFIX(DoubleComplex)* alpha, + const BLAS_PREFIX(DoubleComplex)* x, int incx, BLAS_PREFIX(DoubleComplex)* y, int incy) +{ + +#ifdef __CUDA + return BLAS_PREFIX(blasZaxpy)(handle, n, alpha, x, incx, y, incy); +#else + return BLAS_PREFIX(blas_port_Zaxpy)(handle, n, alpha, x, incx, y, incy); +#endif +} + +template +class AxpyTest : public ::testing::Test +{ + protected: + void SetUp() override + { + + /* 1 + * x = * + * 2 + * * + */ + x = {create_real::eval(1), create_real::eval(-10000), create_real::eval(2), + create_real::eval(-10000)}; + GPU_PREFIX(Malloc)((void**)&x_device, x.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(x_device, x.data(), x.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 7 + * y = * + * 8 + * * + */ + y = {create_real::eval(7), create_real::eval(-10000), create_real::eval(8), + create_real::eval(-10000)}; + GPU_PREFIX(Malloc)((void**)&y_device, y.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(y_device, y.data(), y.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(2); + + y_result.resize(y.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(x_device); + GPU_PREFIX(Free)(y_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector x, y, y_result; + T* x_device; + T* y_device; + T alpha; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + + + +typedef Types AxpyValueTypes; + +TYPED_TEST_CASE(AxpyTest, AxpyValueTypes); + +TYPED_TEST(AxpyTest, Strided) { + BLAS_PREFIX(blasStatus_t) status = + call_axpy(this->handle, 2, &(this->alpha), this->x_device, 2, this->y_device, 2); + EXPECT_TRUE(status == GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(Memcpy)(this->y_result.data(), this->y_device, this->y_result.size() * sizeof(typename TestFixture::value_type), GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->y_result[0]), 9.); + EXPECT_DOUBLE_EQ(get_real_double(this->y_result[2]), 12.); + +} diff --git a/src/SDDK/GPU/hipblas_port/tests/gemm_test.cpp b/src/SDDK/GPU/hipblas_port/tests/gemm_test.cpp new file mode 100644 index 000000000..5223d87fd --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/tests/gemm_test.cpp @@ -0,0 +1,769 @@ +#ifdef __CUDA +#include +#include +#include +#else +#include +#include +#include +#include "hipblas_port.h" +#endif +#include +#include "gtest/gtest.h" +// #define CATCH_CONFIG_MAIN +// #include "catch.hpp" + +using testing::Types; +#ifdef __CUDA +#define GPU_PREFIX(val) cuda##val +#else +#define GPU_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define BLAS_PREFIX(val) cu##val +#else +#define BLAS_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define GPU_PREFIX_CAPS(val) CU##val +#else +#define GPU_PREFIX_CAPS(val) HIP##val +#endif + +template +struct create_real { + template + static inline T eval(const U& val) { + return T(val); + } +}; + +template<> +struct create_real { + template + static inline BLAS_PREFIX(FloatComplex) eval(const U& val) { + BLAS_PREFIX(FloatComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template<> +struct create_real { + template + static inline BLAS_PREFIX(DoubleComplex) eval(const U& val) { + BLAS_PREFIX(DoubleComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template +struct create_complex +{ + template + static inline T eval(const U1& val1, const U2& val2) + { + T c; + c.x = val1; + c.y = val2; + return c; + } +}; + +template +inline double get_real_double(const T& val) { + return double(val); +} + +template<> +inline double get_real_double(const BLAS_PREFIX(FloatComplex)& val) { + return double(val.x); +} + +template<> +inline double get_real_double(const BLAS_PREFIX(DoubleComplex)& val) { + return double(val.x); +} + +inline BLAS_PREFIX(blasStatus_t) call_gemm(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasOperation_t) transa, BLAS_PREFIX(blasOperation_t) transb, + int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, + int ldb, const float* beta, float* C, int ldc) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasSgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +#else + return BLAS_PREFIX(blas_port_Sgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) + call_gemm(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasOperation_t) transa, + BLAS_PREFIX(blasOperation_t) transb, int m, int n, int k, const double* alpha, const double* A, int lda, + const double* B, int ldb, const double* beta, double* C, int ldc) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasDgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +#else + return BLAS_PREFIX(blas_port_Dgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) + call_gemm(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasOperation_t) transa, + BLAS_PREFIX(blasOperation_t) transb, int m, int n, int k, const BLAS_PREFIX(FloatComplex) * alpha, + const BLAS_PREFIX(FloatComplex) * A, int lda, const BLAS_PREFIX(FloatComplex) * B, int ldb, + const BLAS_PREFIX(FloatComplex) * beta, BLAS_PREFIX(FloatComplex) * C, int ldc) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasCgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +#else + return BLAS_PREFIX(blas_port_Cgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) + call_gemm(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasOperation_t) transa, + BLAS_PREFIX(blasOperation_t) transb, int m, int n, int k, const BLAS_PREFIX(DoubleComplex) * alpha, + const BLAS_PREFIX(DoubleComplex) * A, int lda, const BLAS_PREFIX(DoubleComplex) * B, int ldb, + const BLAS_PREFIX(DoubleComplex) * beta, BLAS_PREFIX(DoubleComplex) * C, int ldc) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasZgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +#else + return BLAS_PREFIX(blas_port_Zgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +#endif +} + +template +class GemmRealTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 3 + * A = 2 4 + * * * + */ + A = {create_real::eval(1), create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(4), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 3 + * B = 2 4 + * * * + */ + B = {create_real::eval(1), create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(4), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&B_device, B.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(B_device, B.data(), B.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 3 + * C = 2 4 + * * * + */ + C = {create_real::eval(1), create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(4), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&C_device, C.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(C_device, C.data(), C.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + beta = create_real::eval(2); + + C_result.resize(C.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(B_device); + GPU_PREFIX(Free)(C_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, B, C, C_result; + T* A_device; + T* B_device; + T* C_device; + T alpha, beta; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +typedef Types GemmRealTypes; + +TYPED_TEST_CASE(GemmRealTest, GemmRealTypes); + +TYPED_TEST(GemmRealTest, AN_BN) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_OP_N), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 9.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 14.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 21.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 30.); +} + +TYPED_TEST(GemmRealTest, AT_BN) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_OP_N), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 7.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 15.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 17.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 33.); +} + +TYPED_TEST(GemmRealTest, AN_BT) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_OP_T), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 12.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 18.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 20.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 28.); +} + +TYPED_TEST(GemmRealTest, AT_BT) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_OP_T), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 9.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 19.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 16.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 30.); +} + +TYPED_TEST(GemmRealTest, AC_BC) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_C), GPU_PREFIX_CAPS(BLAS_OP_C), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 9.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 19.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 16.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 30.); +} + +template +class GemmComplexTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 3 + * A = 2 4 + * * * + */ + A = {create_complex::eval(1, 1), create_complex::eval(1, 2), create_complex::eval(1, -10000), + create_complex::eval(1, 3), create_complex::eval(1, 4), create_complex::eval(1, -20000)}; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 3 + * B = 2 4 + * * * + */ + B = {create_complex::eval(1, 1), create_complex::eval(1, 2), create_complex::eval(1, -10000), + create_complex::eval(1, 3), create_complex::eval(1, 4), create_complex::eval(1, -20000)}; + GPU_PREFIX(Malloc)((void**)&B_device, B.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(B_device, B.data(), B.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 3 + * C = 2 4 + * * * + */ + C = {create_complex::eval(1, 1), create_complex::eval(1, 2), create_complex::eval(1, -10000), + create_complex::eval(1, 3), create_complex::eval(1, 4), create_complex::eval(1, -20000)}; + GPU_PREFIX(Malloc)((void**)&C_device, C.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(C_device, C.data(), C.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_complex::eval(1, 0); + beta = create_complex::eval(2, 0); + + C_result.resize(C.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(B_device); + GPU_PREFIX(Free)(C_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, B, C, C_result; + T* A_device; + T* B_device; + T* C_device; + T alpha, beta; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +typedef Types GemmComplexTypes; + +TYPED_TEST_CASE(GemmComplexTest, GemmComplexTypes); + +TYPED_TEST(GemmComplexTest, AN_BN) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_OP_N), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->C_result[0]).x, -3.); + EXPECT_DOUBLE_EQ((this->C_result[1]).x, -6.); + EXPECT_DOUBLE_EQ((this->C_result[3]).x, -11.); + EXPECT_DOUBLE_EQ((this->C_result[4]).x, -18.); + + EXPECT_DOUBLE_EQ((this->C_result[0]).y, 9.); + EXPECT_DOUBLE_EQ((this->C_result[1]).y, 13.); + EXPECT_DOUBLE_EQ((this->C_result[3]).y, 17.); + EXPECT_DOUBLE_EQ((this->C_result[4]).y, 21.); +} + +TYPED_TEST(GemmComplexTest, AT_BN) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_OP_N), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->C_result[0]).x, -1.); + EXPECT_DOUBLE_EQ((this->C_result[1]).x, -7.); + EXPECT_DOUBLE_EQ((this->C_result[3]).x, -7.); + EXPECT_DOUBLE_EQ((this->C_result[4]).x, -21.); + + EXPECT_DOUBLE_EQ((this->C_result[0]).y, 8.); + EXPECT_DOUBLE_EQ((this->C_result[1]).y, 14.); + EXPECT_DOUBLE_EQ((this->C_result[3]).y, 16.); + EXPECT_DOUBLE_EQ((this->C_result[4]).y, 22.); +} + +TYPED_TEST(GemmComplexTest, AN_BT) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_OP_T), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->C_result[0]).x, -6.); + EXPECT_DOUBLE_EQ((this->C_result[1]).x, -10.); + EXPECT_DOUBLE_EQ((this->C_result[3]).x, -10.); + EXPECT_DOUBLE_EQ((this->C_result[4]).x, -16.); + + EXPECT_DOUBLE_EQ((this->C_result[0]).y, 10.); + EXPECT_DOUBLE_EQ((this->C_result[1]).y, 14.); + EXPECT_DOUBLE_EQ((this->C_result[3]).y, 16.); + EXPECT_DOUBLE_EQ((this->C_result[4]).y, 20.); +} + +TYPED_TEST(GemmComplexTest, AT_BT) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_OP_T), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->C_result[0]).x, -3.); + EXPECT_DOUBLE_EQ((this->C_result[1]).x, -11.); + EXPECT_DOUBLE_EQ((this->C_result[3]).x, -6.); + EXPECT_DOUBLE_EQ((this->C_result[4]).x, -18.); + + EXPECT_DOUBLE_EQ((this->C_result[0]).y, 9.); + EXPECT_DOUBLE_EQ((this->C_result[1]).y, 15.); + EXPECT_DOUBLE_EQ((this->C_result[3]).y, 15.); + EXPECT_DOUBLE_EQ((this->C_result[4]).y, 21.); +} + +TYPED_TEST(GemmComplexTest, AC_BC) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_C), GPU_PREFIX_CAPS(BLAS_OP_C), 2, 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->C_result[0]).x, -3.); + EXPECT_DOUBLE_EQ((this->C_result[1]).x, -11.); + EXPECT_DOUBLE_EQ((this->C_result[3]).x, -6.); + EXPECT_DOUBLE_EQ((this->C_result[4]).x, -18.); + + EXPECT_DOUBLE_EQ((this->C_result[0]).y, -5.); + EXPECT_DOUBLE_EQ((this->C_result[1]).y, -7.); + EXPECT_DOUBLE_EQ((this->C_result[3]).y, -3.); + EXPECT_DOUBLE_EQ((this->C_result[4]).y, -5.); +} + +/* + * Non-squared test + */ + +template +class GemmRealNonSquaredTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 3 5 + * A = 2 4 6 + * * * * + */ + A = {create_real::eval(1), create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(4), create_real::eval(-10000), + create_real::eval(5), create_real::eval(6), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 4 + * B = 2 5 + * 3 6 + * * * + */ + B = {create_real::eval(1), create_real::eval(2), create_real::eval(3), create_real::eval(-10000), + create_real::eval(4), create_real::eval(5), create_real::eval(6), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&B_device, B.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(B_device, B.data(), B.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* * * * + * C = * * * + * * * * + */ + C = {create_real::eval(1), create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(4), create_real::eval(-20000), + create_real::eval(3), create_real::eval(4), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&C_device, C.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(C_device, C.data(), C.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + beta = create_real::eval(0); + + C_result.resize(C.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(B_device); + GPU_PREFIX(Free)(C_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, B, C, C_result; + T* A_device; + T* B_device; + T* C_device; + T alpha, beta; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +TYPED_TEST_CASE(GemmRealNonSquaredTest, GemmRealTypes); + +TYPED_TEST(GemmRealNonSquaredTest, AN_BN) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_OP_N), 2, 2, 3, &(this->alpha), + this->A_device, 3, this->B_device, 4, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 22.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 28.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 49.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 64.); +} + +TYPED_TEST(GemmRealNonSquaredTest, AT_BT) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_OP_T), 3, 3, 2, &(this->alpha), + this->A_device, 3, this->B_device, 4, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 9.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 19.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[2]), 29.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 12.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 26.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[5]), 40.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[6]), 15.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[7]), 33.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[8]), 51.); +} + +/* + * Non-squared test 2 + */ + +template +class GemmRealNonSquaredTest2 : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 3 5 + * A = 2 4 6 + * * * * + */ + A = {create_real::eval(1), create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(4), create_real::eval(-10000), + create_real::eval(5), create_real::eval(6), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 3 5 + * B = 2 4 6 + */ + B = {create_real::eval(1), create_real::eval(2), create_real::eval(3), + create_real::eval(4), create_real::eval(5), create_real::eval(6)}; + GPU_PREFIX(Malloc)((void**)&B_device, B.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(B_device, B.data(), B.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* * * * + * C = * * * + * * * * + */ + C.resize(9, create_real::eval(-1000)); + GPU_PREFIX(Malloc)((void**)&C_device, C.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(C_device, C.data(), C.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + beta = create_real::eval(0); + + C_result.resize(C.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(B_device); + GPU_PREFIX(Free)(C_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, B, C, C_result; + T* A_device; + T* B_device; + T* C_device; + T alpha, beta; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +TYPED_TEST_CASE(GemmRealNonSquaredTest2, GemmRealTypes); + +TYPED_TEST(GemmRealNonSquaredTest2, AN_BT) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_OP_T), 2, 2, 3, &(this->alpha), + this->A_device, 3, this->B_device, 2, &(this->beta), this->C_device, 2); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 35.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 44.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[2]), 44.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 56.); +} + +TYPED_TEST(GemmRealNonSquaredTest2, AT_BN) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_OP_N), 3, 3, 2, &(this->alpha), + this->A_device, 3, this->B_device, 2, &(this->beta), this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 5.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 11.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[2]), 17.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 11.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 25.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[5]), 39.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[6]), 17.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[7]), 39.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[8]), 61.); +} + +template +class GemmSiriusTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* + * B * A + */ + m = 412; + n = 15; + k = 15; + lda = m; + ldb = k; + ldc = m; + size_A = k * lda; + size_B = n * ldb; + size_C = n * ldc; + + A.resize(size_A, create_real::eval(-1000)); + B.resize(size_B, create_real::eval(-1000)); + C.resize(size_C, create_real::eval(-1000)); + + for (int i = 0; i < size_A; ++i) { + A[i] = create_real::eval(i + 1); + } + + for (int i = 0; i < size_B; ++i) { + B[i] = create_real::eval(i + 1); + } + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + GPU_PREFIX(Malloc)((void**)&B_device, B.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(B_device, B.data(), B.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + GPU_PREFIX(Malloc)((void**)&C_device, C.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(C_device, C.data(), C.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + beta = create_real::eval(0); + + C_result.resize(C.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(B_device); + GPU_PREFIX(Free)(C_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + int m, n, k, ldb, lda, ldc, size_B, size_A, size_C; + std::vector A, B, C, C_result; + T* A_device; + T* B_device; + T* C_device; + T alpha; + T beta; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +TYPED_TEST_CASE(GemmSiriusTest, GemmRealTypes); + +TYPED_TEST(GemmSiriusTest, SIRIUS_N_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_gemm(this->handle, GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_OP_N), this->m, this->n, this->k, + &(this->alpha), this->A_device, this->lda, this->B_device, this->ldb, &(this->beta), + this->C_device, this->ldc); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 461560.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[411]), 510880.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[414]), 1111375.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[825]), 1760380.); +} + diff --git a/src/SDDK/GPU/hipblas_port/tests/gemv_test.cpp b/src/SDDK/GPU/hipblas_port/tests/gemv_test.cpp new file mode 100644 index 000000000..2cf75b12a --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/tests/gemv_test.cpp @@ -0,0 +1,341 @@ +#ifdef __CUDA +#include +#include +#include +#else +#include +#include +#include +#include "hipblas_port.h" +#endif +#include +#include "gtest/gtest.h" +// #define CATCH_CONFIG_MAIN +// #include "catch.hpp" + +using testing::Types; +#ifdef __CUDA +#define GPU_PREFIX(val) cuda##val +#else +#define GPU_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define BLAS_PREFIX(val) cu##val +#else +#define BLAS_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define GPU_PREFIX_CAPS(val) CU##val +#else +#define GPU_PREFIX_CAPS(val) HIP##val +#endif + +template +struct create_real { + template + static inline T eval(const U& val) { + return T(val); + } +}; + +template<> +struct create_real { + template + static inline BLAS_PREFIX(FloatComplex) eval(const U& val) { + BLAS_PREFIX(FloatComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template<> +struct create_real { + template + static inline BLAS_PREFIX(DoubleComplex) eval(const U& val) { + BLAS_PREFIX(DoubleComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template +struct create_complex +{ + template + static inline T eval(const U1& val1, const U2& val2) + { + T c; + c.x = val1; + c.y = val2; + return c; + } +}; + +template +inline double get_real_double(const T& val) { + return double(val); +} + +template<> +inline double get_real_double(const BLAS_PREFIX(FloatComplex)& val) { + return double(val.x); +} + +template<> +inline double get_real_double(const BLAS_PREFIX(DoubleComplex)& val) { + return double(val.x); +} + +inline BLAS_PREFIX(blasStatus_t) call_gemv(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasOperation_t) trans, int m, int n, const float* alpha, + const float* A, int lda, const float* x, int incx, const float* beta, float* y, + int incy) { +#ifdef __CUDA + return BLAS_PREFIX(blasSgemv)(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +#else + return BLAS_PREFIX(blas_port_Sgemv)(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) call_gemv(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasOperation_t) trans, int m, int n, const double* alpha, + const double* A, int lda, const double* x, int incx, const double* beta, double* y, + int incy) { +#ifdef __CUDA + return BLAS_PREFIX(blasDgemv)(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +#else + return BLAS_PREFIX(blas_port_Dgemv)(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) call_gemv(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasOperation_t) trans, int m, int n, + const BLAS_PREFIX(FloatComplex)* alpha, const BLAS_PREFIX(FloatComplex)* A, int lda, + const BLAS_PREFIX(FloatComplex)* x, int incx, const BLAS_PREFIX(FloatComplex)* beta, BLAS_PREFIX(FloatComplex)* y, + int incy) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasCgemv)(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +#else + return BLAS_PREFIX(blas_port_Cgemv)(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) call_gemv(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasOperation_t) trans, int m, int n, + const BLAS_PREFIX(DoubleComplex)* alpha, const BLAS_PREFIX(DoubleComplex)* A, int lda, + const BLAS_PREFIX(DoubleComplex)* x, int incx, const BLAS_PREFIX(DoubleComplex)* beta, BLAS_PREFIX(DoubleComplex)* y, + int incy) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasZgemv)(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +#else + return BLAS_PREFIX(blas_port_Zgemv)(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +#endif +} + +template +class GemvRealTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 4 + * A = 2 5 + * 3 6 + * * * + */ + A = {create_real::eval(1), create_real::eval(2), create_real::eval(3), + create_real::eval(-10000), create_real::eval(4), create_real::eval(5), + create_real::eval(6), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 + * x = * + * 2 + * * + * 3 + * * + */ + x = {create_real::eval(1), create_real::eval(-10000), + create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(-10000)}; + GPU_PREFIX(Malloc)((void**)&x_device, x.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(x_device, x.data(), x.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 + * y = * + * 1 + * * + * 1 + * * + */ + y = {create_real::eval(1), create_real::eval(-10000), + create_real::eval(1), create_real::eval(-10000), + create_real::eval(1), create_real::eval(-10000)}; + GPU_PREFIX(Malloc)((void**)&y_device, y.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(y_device, y.data(), y.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + beta = create_real::eval(2); + + y_result.resize(y.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(x_device); + GPU_PREFIX(Free)(y_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, x, y, y_result; + T* A_device; + T* x_device; + T* y_device; + T alpha, beta; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + + + +typedef Types GemvValueTypes; + +TYPED_TEST_CASE(GemvRealTest, GemvValueTypes); + +TYPED_TEST(GemvRealTest, OP_NONE) { + BLAS_PREFIX(blasStatus_t) status = + call_gemv(this->handle, GPU_PREFIX_CAPS(BLAS_OP_N), 3, 2, &(this->alpha), this->A_device, 4, this->x_device, 2, &(this->beta), this->y_device, 2); + EXPECT_TRUE(status == GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(Memcpy)(this->y_result.data(), this->y_device, this->y_result.size() * sizeof(typename TestFixture::value_type), GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->y_result[0]), 11.); + EXPECT_DOUBLE_EQ(get_real_double(this->y_result[2]), 14.); + EXPECT_DOUBLE_EQ(get_real_double(this->y_result[4]), 17.); +} + +TYPED_TEST(GemvRealTest, OP_T) { + BLAS_PREFIX(blasStatus_t) status = + call_gemv(this->handle, GPU_PREFIX_CAPS(BLAS_OP_T), 3, 2, &(this->alpha), this->A_device, 4, this->x_device, 2, &(this->beta), this->y_device, 2); + EXPECT_TRUE(status == GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(Memcpy)(this->y_result.data(), this->y_device, this->y_result.size() * sizeof(typename TestFixture::value_type), GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->y_result[0]), 16.); + EXPECT_DOUBLE_EQ(get_real_double(this->y_result[2]), 34.); +} + +TYPED_TEST(GemvRealTest, OP_C) { + BLAS_PREFIX(blasStatus_t) status = + call_gemv(this->handle, GPU_PREFIX_CAPS(BLAS_OP_C), 3, 2, &(this->alpha), this->A_device, 4, this->x_device, 2, &(this->beta), this->y_device, 2); + EXPECT_TRUE(status == GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(Memcpy)(this->y_result.data(), this->y_device, this->y_result.size() * sizeof(typename TestFixture::value_type), GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->y_result[0]), 16.); + EXPECT_DOUBLE_EQ(get_real_double(this->y_result[2]), 34.); +} + + +template +class GemvComplexTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 4 + * A = 2 5 + * 3 6 + * * * + */ + A = {create_complex::eval(1, 1), create_complex::eval(1, 2), create_complex::eval(1, 3), create_complex::eval(1, -10000), create_complex::eval(1, 4), create_complex::eval(1, 5), create_complex::eval(1, 6), create_complex::eval(1, -20000)}; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 + * x = * + * 2 + * * + * 3 + * * + */ + x = {create_complex::eval(1, 1), create_complex::eval(1, -10000), create_complex::eval(1, 2), create_complex::eval(1, -10000), create_complex::eval(1, 3), create_complex::eval(1, -10000)}; + GPU_PREFIX(Malloc)((void**)&x_device, x.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(x_device, x.data(), x.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 + * y = * + * 1 + * * + * 1 + * * + */ + y = {create_complex::eval(1, 1), create_complex::eval(1, -10000), create_complex::eval(1, 1), create_complex::eval(1, -10000), create_complex::eval(1, 1), create_complex::eval(1, -10000)}; + GPU_PREFIX(Malloc)((void**)&y_device, y.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(y_device, y.data(), y.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + beta = create_real::eval(2); + + y_result.resize(y.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(x_device); + GPU_PREFIX(Free)(y_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, x, y, y_result; + T* A_device; + T* x_device; + T* y_device; + T alpha, beta; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +typedef Types GemvComplexValueTypes; + +TYPED_TEST_CASE(GemvComplexTest, GemvComplexValueTypes); + +TYPED_TEST(GemvComplexTest, OP_NONE) { + BLAS_PREFIX(blasStatus_t) status = + call_gemv(this->handle, GPU_PREFIX_CAPS(BLAS_OP_N), 3, 2, &(this->alpha), this->A_device, 4, this->x_device, 2, &(this->beta), this->y_device, 2); + EXPECT_TRUE(status == GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(Memcpy)(this->y_result.data(), this->y_device, this->y_result.size() * sizeof(typename TestFixture::value_type), GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->y_result[0]).y, 10.); + EXPECT_DOUBLE_EQ((this->y_result[2]).y, 12.); + EXPECT_DOUBLE_EQ((this->y_result[4]).y, 14.); +} + +TYPED_TEST(GemvComplexTest, OP_T) { + BLAS_PREFIX(blasStatus_t) status = + call_gemv(this->handle, GPU_PREFIX_CAPS(BLAS_OP_T), 3, 2, &(this->alpha), this->A_device, 4, this->x_device, 2, &(this->beta), this->y_device, 2); + EXPECT_TRUE(status == GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(Memcpy)(this->y_result.data(), this->y_device, this->y_result.size() * sizeof(typename TestFixture::value_type), GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->y_result[0]).y, 14.); + EXPECT_DOUBLE_EQ((this->y_result[2]).y, 23.); +} + +TYPED_TEST(GemvComplexTest, OP_C) { + BLAS_PREFIX(blasStatus_t) status = + call_gemv(this->handle, GPU_PREFIX_CAPS(BLAS_OP_C), 3, 2, &(this->alpha), this->A_device, 4, this->x_device, 2, &(this->beta), this->y_device, 2); + EXPECT_TRUE(status == GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(Memcpy)(this->y_result.data(), this->y_device, this->y_result.size() * sizeof(typename TestFixture::value_type), GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->y_result[0]).y, 2.); + EXPECT_DOUBLE_EQ((this->y_result[2]).y, -7.); +} + + diff --git a/src/SDDK/GPU/hipblas_port/tests/ger_test.cpp b/src/SDDK/GPU/hipblas_port/tests/ger_test.cpp new file mode 100644 index 000000000..c47e8c247 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/tests/ger_test.cpp @@ -0,0 +1,221 @@ +#ifdef __CUDA +#include +#include +#include +#else +#include +#include +#include +#include "hipblas_port.h" +#endif +#include +#include "gtest/gtest.h" +// #define CATCH_CONFIG_MAIN +// #include "catch.hpp" + +using testing::Types; +#ifdef __CUDA +#define GPU_PREFIX(val) cuda##val +#else +#define GPU_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define BLAS_PREFIX(val) cu##val +#else +#define BLAS_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define GPU_PREFIX_CAPS(val) CU##val +#else +#define GPU_PREFIX_CAPS(val) HIP##val +#endif + +template +struct create_real { + template + static inline T eval(const U& val) { + return T(val); + } +}; + +template<> +struct create_real { + template + static inline BLAS_PREFIX(FloatComplex) eval(const U& val) { + BLAS_PREFIX(FloatComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template<> +struct create_real { + template + static inline BLAS_PREFIX(DoubleComplex) eval(const U& val) { + BLAS_PREFIX(DoubleComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template +struct create_complex +{ + template + static inline T eval(const U1& val1, const U2& val2) + { + T c; + c.x = val1; + c.y = val2; + return c; + } +}; + +template +inline double get_real_double(const T& val) { + return double(val); +} + +template<> +inline double get_real_double(const BLAS_PREFIX(FloatComplex)& val) { + return double(val.x); +} + +template<> +inline double get_real_double(const BLAS_PREFIX(DoubleComplex)& val) { + return double(val.x); +} + +inline BLAS_PREFIX(blasStatus_t) call_ger(BLAS_PREFIX(blasHandle_t) handle, int m, int n, const float* alpha, const float* x, int incx, + const float* y, int incy, float* A, int lda) +{ + +#ifdef __CUDA + return BLAS_PREFIX(blasSger)(handle, m, n, alpha, x, incx, y, incy, A, lda); +#else + return BLAS_PREFIX(blas_port_Sger)(handle, m, n, alpha, x, incx, y, incy, A, lda); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) call_ger(BLAS_PREFIX(blasHandle_t) handle, int m, int n, const double* alpha, const double* x, + int incx, const double* y, int incy, double* A, int lda) +{ + +#ifdef __CUDA + return BLAS_PREFIX(blasDger)(handle, m, n, alpha, x, incx, y, incy, A, lda); +#else + return BLAS_PREFIX(blas_port_Dger)(handle, m, n, alpha, x, incx, y, incy, A, lda); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) call_ger(BLAS_PREFIX(blasHandle_t) handle, int m, int n, const BLAS_PREFIX(FloatComplex)* alpha, + const BLAS_PREFIX(FloatComplex)* x, int incx, const BLAS_PREFIX(FloatComplex)* y, int incy, + BLAS_PREFIX(FloatComplex)* A, int lda) +{ + +#ifdef __CUDA + return BLAS_PREFIX(blasCgeru)(handle, m, n, alpha, x, incx, y, incy, A, lda); +#else + return BLAS_PREFIX(blas_port_Cgeru)(handle, m, n, alpha, x, incx, y, incy, A, lda); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) call_ger(BLAS_PREFIX(blasHandle_t) handle, int m, int n, const BLAS_PREFIX(DoubleComplex)* alpha, + const BLAS_PREFIX(DoubleComplex)* x, int incx, const BLAS_PREFIX(DoubleComplex)* y, int incy, + BLAS_PREFIX(DoubleComplex)* A, int lda) +{ + +#ifdef __CUDA + return BLAS_PREFIX(blasZgeru)(handle, m, n, alpha, x, incx, y, incy, A, lda); +#else + return BLAS_PREFIX(blas_port_Zgeru)(handle, m, n, alpha, x, incx, y, incy, A, lda); +#endif +} + +template +class GerTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 4 + * A = 2 5 + * 3 6 + * * * + */ + A = {create_real::eval(1), create_real::eval(2), create_real::eval(3), create_real::eval(-10000), + create_real::eval(4), create_real::eval(5), create_real::eval(6), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 + * x = * + * 2 + * * + * 3 + * * + */ + x = {create_real::eval(1), create_real::eval(-10000), + create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(-10000)}; + GPU_PREFIX(Malloc)((void**)&x_device, x.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(x_device, x.data(), x.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 7 + * y = * + * 8 + * * + */ + y = {create_real::eval(7), create_real::eval(-10000), + create_real::eval(8), create_real::eval(-10000)}; + GPU_PREFIX(Malloc)((void**)&y_device, y.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(y_device, y.data(), y.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(2); + + A_result.resize(A.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(x_device); + GPU_PREFIX(Free)(y_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, x, y, A_result; + T* A_device; + T* x_device; + T* y_device; + T alpha; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + + + +typedef Types GemvValueTypes; + +TYPED_TEST_CASE(GerTest, GemvValueTypes); + +TYPED_TEST(GerTest, NON_SQUARED) { + BLAS_PREFIX(blasStatus_t) status = + call_ger(this->handle, 3, 2, &(this->alpha), this->x_device, 2, this->y_device, 2, this->A_device, 4); + EXPECT_TRUE(status == GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(Memcpy)(this->A_result.data(), this->A_device, this->A_result.size() * sizeof(typename TestFixture::value_type), GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->A_result[0]), 15.); + EXPECT_DOUBLE_EQ(get_real_double(this->A_result[1]), 30.); + EXPECT_DOUBLE_EQ(get_real_double(this->A_result[2]), 45.); + EXPECT_DOUBLE_EQ(get_real_double(this->A_result[4]), 20.); + EXPECT_DOUBLE_EQ(get_real_double(this->A_result[5]), 37.); + EXPECT_DOUBLE_EQ(get_real_double(this->A_result[6]), 54.); +} diff --git a/src/SDDK/GPU/hipblas_port/tests/main.cpp b/src/SDDK/GPU/hipblas_port/tests/main.cpp new file mode 100644 index 000000000..b95674848 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/tests/main.cpp @@ -0,0 +1,8 @@ +#include "gtest/gtest.h" + +int main(int argc, char *argv[]) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/src/SDDK/GPU/hipblas_port/tests/trmm_test.cpp b/src/SDDK/GPU/hipblas_port/tests/trmm_test.cpp new file mode 100644 index 000000000..637f93268 --- /dev/null +++ b/src/SDDK/GPU/hipblas_port/tests/trmm_test.cpp @@ -0,0 +1,951 @@ +#ifdef __CUDA +#include +#include +#include +#else +#include +#include +#include +#include "hipblas_port.h" +#endif +#include +#include "gtest/gtest.h" +// #define CATCH_CONFIG_MAIN +// #include "catch.hpp" + +using testing::Types; +#ifdef __CUDA +#define GPU_PREFIX(val) cuda##val +#else +#define GPU_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define BLAS_PREFIX(val) cu##val +#else +#define BLAS_PREFIX(val) hip##val +#endif + +#ifdef __CUDA +#define GPU_PREFIX_CAPS(val) CU##val +#else +#define GPU_PREFIX_CAPS(val) HIP##val +#endif + +template +struct create_real +{ + template + static inline T eval(const U& val) + { + return T(val); + } +}; + +template <> +struct create_real +{ + template + static inline BLAS_PREFIX(FloatComplex) eval(const U& val) + { + BLAS_PREFIX(FloatComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template <> +struct create_real +{ + template + static inline BLAS_PREFIX(DoubleComplex) eval(const U& val) + { + BLAS_PREFIX(DoubleComplex) c; + c.x = val; + c.y = 0; + return c; + } +}; + +template +struct create_complex +{ + template + static inline T eval(const U1& val1, const U2& val2) + { + T c; + c.x = val1; + c.y = val2; + return c; + } +}; +template +inline double get_real_double(const T& val) +{ + return double(val); +} + +template <> +inline double get_real_double(const BLAS_PREFIX(FloatComplex) & val) +{ + return double(val.x); +} + +template <> +inline double get_real_double(const BLAS_PREFIX(DoubleComplex) & val) +{ + return double(val.x); +} + +inline BLAS_PREFIX(blasStatus_t) + call_trmm(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasSideMode_t) side, BLAS_PREFIX(blasFillMode_t) uplo, + BLAS_PREFIX(blasOperation_t) trans, BLAS_PREFIX(blasDiagType_t) diag, int m, int n, const float* alpha, + const float* A, int lda, const float* B, int ldb, float* C, int ldc) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasStrmm)(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +#else + return BLAS_PREFIX(blas_port_Strmm)(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) + call_trmm(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasSideMode_t) side, BLAS_PREFIX(blasFillMode_t) uplo, + BLAS_PREFIX(blasOperation_t) trans, BLAS_PREFIX(blasDiagType_t) diag, int m, int n, const double* alpha, + const double* A, int lda, const double* B, int ldb, double* C, int ldc) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasDtrmm)(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +#else + return BLAS_PREFIX(blas_port_Dtrmm)(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) + call_trmm(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasSideMode_t) side, BLAS_PREFIX(blasFillMode_t) uplo, + BLAS_PREFIX(blasOperation_t) trans, BLAS_PREFIX(blasDiagType_t) diag, int m, int n, + const BLAS_PREFIX(FloatComplex) * alpha, const BLAS_PREFIX(FloatComplex) * A, int lda, + const BLAS_PREFIX(FloatComplex) * B, int ldb, BLAS_PREFIX(FloatComplex) * C, int ldc) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasCtrmm)(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +#else + return BLAS_PREFIX(blas_port_Ctrmm)(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +#endif +} + +inline BLAS_PREFIX(blasStatus_t) + call_trmm(BLAS_PREFIX(blasHandle_t) handle, BLAS_PREFIX(blasSideMode_t) side, BLAS_PREFIX(blasFillMode_t) uplo, + BLAS_PREFIX(blasOperation_t) trans, BLAS_PREFIX(blasDiagType_t) diag, int m, int n, + const BLAS_PREFIX(DoubleComplex) * alpha, const BLAS_PREFIX(DoubleComplex) * A, int lda, + const BLAS_PREFIX(DoubleComplex) * B, int ldb, BLAS_PREFIX(DoubleComplex) * C, int ldc) +{ +#ifdef __CUDA + return BLAS_PREFIX(blasZtrmm)(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +#else + return BLAS_PREFIX(blas_port_Ztrmm)(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc); +#endif +} + +template +class TrmmRealTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 3 + * A = 2 4 + * * * + */ + A = {create_real::eval(1), create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(4), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 3 + * B = 2 4 + * * * + */ + B = {create_real::eval(1), create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(4), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&B_device, B.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(B_device, B.data(), B.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 3 + * C = 2 4 + * * * + */ + C = {create_real::eval(1), create_real::eval(2), create_real::eval(-10000), + create_real::eval(3), create_real::eval(4), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&C_device, C.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(C_device, C.data(), C.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + + C_result.resize(C.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(B_device); + GPU_PREFIX(Free)(C_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, B, C, C_result; + T* A_device; + T* B_device; + T* C_device; + T alpha; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +// typedef Types TrmmRealTypes; +typedef Types TrmmRealTypes; + +TYPED_TEST_CASE(TrmmRealTest, TrmmRealTypes); + +TYPED_TEST(TrmmRealTest, LEFT_FULL_NU_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_FULL), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 7.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 10.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 15.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 22.); +} + +TYPED_TEST(TrmmRealTest, LEFT_LOWER_NU_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_LOWER), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 1.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 10.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 3.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 22.); +} + +TYPED_TEST(TrmmRealTest, LEFT_UPPER_NU_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_UPPER), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 7.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 8.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 15.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 16.); +} + +TYPED_TEST(TrmmRealTest, LEFT_UPPER_U_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_UPPER), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 7.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 2.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 15.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 4.); +} + +TYPED_TEST(TrmmRealTest, LEFT_LOWER_U_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_LOWER), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 1.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 4.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 3.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 10.); +} + +TYPED_TEST(TrmmRealTest, LEFT_UPPER_U_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_UPPER), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 1.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 5.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 3.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 13.); +} + +TYPED_TEST(TrmmRealTest, LEFT_LOWER_U_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_LOWER), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 5.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 2.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 11.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 4.); +} + +TYPED_TEST(TrmmRealTest, LEFT_UPPER_NU_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_UPPER), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 1.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 11.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 3.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 25.); +} + +TYPED_TEST(TrmmRealTest, LEFT_LOWER_NU_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_LOWER), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 5.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 8.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 11.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 16.); +} + +TYPED_TEST(TrmmRealTest, LEFT_FULL_NU_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_FULL), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 5.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 11.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 11.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 25.); +} + +/* + * RIGHT SIDE + */ +TYPED_TEST(TrmmRealTest, RIGHT_FULL_NU_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_FULL), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 7.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 10.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 15.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 22.); +} + +TYPED_TEST(TrmmRealTest, RIGHT_LOWER_NU_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_LOWER), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 7.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 10.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 12.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 16.); +} + +TYPED_TEST(TrmmRealTest, RIGHT_UPPER_NU_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_UPPER), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 1.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 2.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 15.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 22.); +} + +TYPED_TEST(TrmmRealTest, RIGHT_UPPER_U_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_UPPER), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 1.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 2.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 6.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 10.); +} + +TYPED_TEST(TrmmRealTest, RIGHT_LOWER_U_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_LOWER), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 7.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 10.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 3.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 4.); +} + +TYPED_TEST(TrmmRealTest, RIGHT_UPPER_U_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_UPPER), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 10.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 14.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 3.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 4.); +} + +TYPED_TEST(TrmmRealTest, RIGHT_LOWER_U_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_LOWER), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 1.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 2.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 5.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 8.); +} + +TYPED_TEST(TrmmRealTest, RIGHT_UPPER_NU_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_UPPER), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 10.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 14.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 12.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 16.); +} + +TYPED_TEST(TrmmRealTest, RIGHT_LOWER_NU_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_LOWER), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 1.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 2.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 14.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 20.); +} + +TYPED_TEST(TrmmRealTest, RIGHT_FULL_NU_T) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_FULL), + GPU_PREFIX_CAPS(BLAS_OP_T), GPU_PREFIX_CAPS(BLAS_DIAG_UNIT), 2, 2, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 10.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[1]), 14.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[3]), 14.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[4]), 20.); +} + +/************************** + * Complex only + **************************/ + +template +class TrmmComplexLeftTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 3 + * A = 2 4 + * * * + */ + A = {create_complex::eval(1, 1), create_complex::eval(2, 2), create_real::eval(-10000), + create_complex::eval(3, 3), create_complex::eval(4, 4), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 3 5 + * B = 2 4 6 + * * * * + */ + B = {create_complex::eval(1, 1), create_complex::eval(2, 2), create_real::eval(-10000), + create_complex::eval(3, 3), create_complex::eval(4, 4), create_real::eval(-20000), + create_complex::eval(5, 5), create_complex::eval(6, 6), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&B_device, B.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(B_device, B.data(), B.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* * * * + * C = * * * + * * * * + */ + C = {create_real::eval(-1000), create_real::eval(-1000), create_real::eval(-10000), + create_real::eval(-1000), create_real::eval(-1000), create_real::eval(-20000), + create_real::eval(-1000), create_real::eval(-1000), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&C_device, C.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(C_device, C.data(), C.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + + C_result.resize(C.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(B_device); + GPU_PREFIX(Free)(C_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, B, C, C_result; + T* A_device; + T* B_device; + T* C_device; + T alpha; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +typedef Types TrmmComplexTypes; + +TYPED_TEST_CASE(TrmmComplexLeftTest, TrmmComplexTypes); + +TYPED_TEST(TrmmComplexLeftTest, COMPLEX_LEFT_FULL_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_FULL), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 3, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->C_result[0]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[0]).y, 14.); + + EXPECT_DOUBLE_EQ((this->C_result[1]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[1]).y, 20.); + + EXPECT_DOUBLE_EQ((this->C_result[3]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[3]).y, 30.); + + EXPECT_DOUBLE_EQ((this->C_result[4]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[4]).y, 44.); + + EXPECT_DOUBLE_EQ((this->C_result[6]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[6]).y, 46.); + + EXPECT_DOUBLE_EQ((this->C_result[7]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[7]).y, 68.); +} + +TYPED_TEST(TrmmComplexLeftTest, COMPLEX_LEFT_FULL_C) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_LEFT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_FULL), + GPU_PREFIX_CAPS(BLAS_OP_C), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 3, &(this->alpha), + this->A_device, 3, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->C_result[0]).x, 10.); + EXPECT_DOUBLE_EQ((this->C_result[0]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[1]).x, 22.); + EXPECT_DOUBLE_EQ((this->C_result[1]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[3]).x, 22.); + EXPECT_DOUBLE_EQ((this->C_result[3]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[4]).x, 50.); + EXPECT_DOUBLE_EQ((this->C_result[4]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[6]).x, 34.); + EXPECT_DOUBLE_EQ((this->C_result[6]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[7]).x, 78.); + EXPECT_DOUBLE_EQ((this->C_result[7]).y, 0.); +} + +template +class TrmmComplexRightTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* 1 4 7 + * A = 2 5 8 + * 3 6 9 + * * * * + */ + A = { + create_complex::eval(1, 1), create_complex::eval(2, 2), create_complex::eval(3, 3), + create_real::eval(-10000), create_complex::eval(4, 4), create_complex::eval(5, 5), + create_complex::eval(6, 6), create_real::eval(-20000), create_complex::eval(7, 7), + create_complex::eval(8, 8), create_complex::eval(9, 9), create_real::eval(-30000), + }; + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* 1 3 5 + * B = 2 4 6 + * * * * + */ + B = {create_complex::eval(1, 1), create_complex::eval(2, 2), create_real::eval(-10000), + create_complex::eval(3, 3), create_complex::eval(4, 4), create_real::eval(-20000), + create_complex::eval(5, 5), create_complex::eval(6, 6), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&B_device, B.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(B_device, B.data(), B.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + /* * * * + * C = * * * + * * * * + */ + C = {create_real::eval(-1000), create_real::eval(-1000), create_real::eval(-10000), + create_real::eval(-1000), create_real::eval(-1000), create_real::eval(-20000), + create_real::eval(-1000), create_real::eval(-1000), create_real::eval(-20000)}; + GPU_PREFIX(Malloc)((void**)&C_device, C.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(C_device, C.data(), C.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + + C_result.resize(C.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(B_device); + GPU_PREFIX(Free)(C_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + std::vector A, B, C, C_result; + T* A_device; + T* B_device; + T* C_device; + T alpha; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +TYPED_TEST_CASE(TrmmComplexRightTest, TrmmComplexTypes); + +TYPED_TEST(TrmmComplexRightTest, COMPLEX_RIGHT_FULL_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_FULL), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 3, &(this->alpha), + this->A_device, 4, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->C_result[0]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[0]).y, 44.); + + EXPECT_DOUBLE_EQ((this->C_result[1]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[1]).y, 56.); + + EXPECT_DOUBLE_EQ((this->C_result[3]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[3]).y, 98.); + + EXPECT_DOUBLE_EQ((this->C_result[4]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[4]).y, 128.); + + EXPECT_DOUBLE_EQ((this->C_result[6]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[6]).y, 152.); + + EXPECT_DOUBLE_EQ((this->C_result[7]).x, 0.); + EXPECT_DOUBLE_EQ((this->C_result[7]).y, 200.); +} + +TYPED_TEST(TrmmComplexRightTest, COMPLEX_RIGHT_FULL_C) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_FULL), + GPU_PREFIX_CAPS(BLAS_OP_C), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), 2, 3, &(this->alpha), + this->A_device, 4, this->B_device, 3, this->C_device, 3); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ((this->C_result[0]).x, 96.); + EXPECT_DOUBLE_EQ((this->C_result[0]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[1]).x, 120.); + EXPECT_DOUBLE_EQ((this->C_result[1]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[3]).x, 114.); + EXPECT_DOUBLE_EQ((this->C_result[3]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[4]).x, 144.); + EXPECT_DOUBLE_EQ((this->C_result[4]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[6]).x, 132.); + EXPECT_DOUBLE_EQ((this->C_result[6]).y, 0.); + + EXPECT_DOUBLE_EQ((this->C_result[7]).x, 168.); + EXPECT_DOUBLE_EQ((this->C_result[7]).y, 0.); +} + +template +class TrmmSiriusTest : public ::testing::Test +{ + protected: + void SetUp() override + { + /* + * B * A + */ + m = 412; + n = 15; + ldb = m; + lda = 60; + size_B = ldb * n; + size_A = n * lda; + + A.resize(size_A, create_real::eval(-1000)); + B.resize(size_B, create_real::eval(-1000)); + C.resize(size_B, create_real::eval(-1000)); + + { + int i = 1; + for (int col = 0; col < n; ++col) { + for (int row = 0; row < n; ++row) { + if (col >= row) + A[row + col * lda] = create_real::eval(i); + + ++i; + } + } + } + + for (int i = 0; i < size_B; ++i) { + B[i] = create_real::eval(i + 1); + } + + GPU_PREFIX(Malloc)((void**)&A_device, A.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(A_device, A.data(), A.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + GPU_PREFIX(Malloc)((void**)&B_device, B.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(B_device, B.data(), B.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + GPU_PREFIX(Malloc)((void**)&C_device, C.size() * sizeof(T)); + GPU_PREFIX(Memcpy)(C_device, C.data(), C.size() * sizeof(T), GPU_PREFIX(MemcpyHostToDevice)); + + BLAS_PREFIX(blasCreate)(&handle); + + alpha = create_real::eval(1); + + C_result.resize(C.size()); + } + + void TearDown() override + { + GPU_PREFIX(Free)(A_device); + GPU_PREFIX(Free)(B_device); + GPU_PREFIX(Free)(C_device); + BLAS_PREFIX(blasDestroy)(handle); + } + + int m, n, ldb, lda, size_B, size_A; + std::vector A, B, C, C_result; + T* A_device; + T* B_device; + T* C_device; + T alpha; + BLAS_PREFIX(blasHandle_t) handle; + using value_type = T; +}; + +TYPED_TEST_CASE(TrmmSiriusTest, TrmmRealTypes); + +TYPED_TEST(TrmmSiriusTest, LEFT_LOWER_NU_N) +{ + BLAS_PREFIX(blasStatus_t) + status = call_trmm(this->handle, GPU_PREFIX_CAPS(BLAS_SIDE_RIGHT), GPU_PREFIX_CAPS(BLAS_FILL_MODE_UPPER), + GPU_PREFIX_CAPS(BLAS_OP_N), GPU_PREFIX_CAPS(BLAS_DIAG_NON_UNIT), this->m, this->n, + &(this->alpha), this->A_device, this->lda, this->B_device, this->ldb, this->C_device, this->ldb); + ASSERT_EQ(status, GPU_PREFIX_CAPS(BLAS_STATUS_SUCCESS)); + GPU_PREFIX(DeviceSynchronize()); + ASSERT_EQ(GPU_PREFIX(GetLastError)(), GPU_PREFIX(Success)); + GPU_PREFIX(Memcpy) + (this->C_result.data(), this->C_device, this->C_result.size() * sizeof(typename TestFixture::value_type), + GPU_PREFIX(MemcpyDeviceToHost)); + + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[0]), 1.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[411]), 412.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[412]), 7037.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[413]), 7070.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[414]), 7103.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[824]), 40472.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[825]), 40568.); + EXPECT_DOUBLE_EQ(get_real_double(this->C_result[826]), 40664.); +} + From d4dd82f5b624666b463c93595e7c6e9a24c55e0e Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Mon, 11 Mar 2019 15:46:58 +0100 Subject: [PATCH 18/28] changed output directory for hip object files set in CMake --- cmake/modules/FindROCM.cmake | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/cmake/modules/FindROCM.cmake b/cmake/modules/FindROCM.cmake index e5072af27..5bbde5352 100644 --- a/cmake/modules/FindROCM.cmake +++ b/cmake/modules/FindROCM.cmake @@ -357,6 +357,7 @@ macro(rocm_hip_add_library) endif() set(_ROCM_SOURCES ${_ROCM_SOURCES} ${source}) endforeach() + get_filename_component(HIP_LIB_OUTPUT_DIR ${HIP_LIB_OUTPUT_DIR} ABSOLUTE) # generate flags to use set(_ROCM_STD_FLAGS ${HIP_LIB_FLAGS} ${ROCM_HIPCC_FLAGS}) @@ -377,28 +378,40 @@ macro(rocm_hip_add_library) # create imported shared library if(HIP_LIB_SHARED) set(_ROCM_FLAGS ${_ROCM_FLAGS} -fPIC) - add_library(${HIP_LIB_NAME} SHARED IMPORTED GLOBAL) - set_target_properties(${HIP_LIB_NAME} PROPERTIES IMPORTED_LOCATION ${HIP_LIB_OUTPUT_DIR}/lib${HIP_LIB_NAME}.so) endif() # compile all files to .o set(_ROCM_OBJS) set(_ROCM_OBJ_TARGETS) foreach(_rocm_file IN LISTS _ROCM_SOURCES) + + # create output directory for .o file + get_filename_component(_ROCM_CURRENT_DIR ${_rocm_file} DIRECTORY) + file(RELATIVE_PATH _ROCM_CURRENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ${_ROCM_CURRENT_DIR}) + set(_ROCM_OBJ_OUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${HIP_LIB_NAME}.dir/${_ROCM_CURRENT_DIR}") + file(MAKE_DIRECTORY ${_ROCM_OBJ_OUT_DIR}) + + # set .o name and path get_filename_component(_ROCM_FILE_NAME_ONLY ${_rocm_file} NAME) - set(_ROCM_OBJ_FILE ${HIP_LIB_OUTPUT_DIR}/${_ROCM_FILE_NAME_ONLY}.o) + set(_ROCM_OBJ_FILE ${_ROCM_OBJ_OUT_DIR}/${_ROCM_FILE_NAME_ONLY}.o) list(APPEND _ROCM_OBJS ${_ROCM_OBJ_FILE}) list(APPEND _ROCM_OBJ_TARGETS HIP_TARGET_${_ROCM_FILE_NAME_ONLY}) + + # compile .o file add_custom_target(HIP_TARGET_${_ROCM_FILE_NAME_ONLY} COMMAND ${ROCM_HIPCC_EXECUTABLE} -c ${_rocm_file} -o ${_ROCM_OBJ_FILE} ${_ROCM_FLAGS} ${_ROCM_FULL_PATH_INCLUDE_FLAGS} - WORKING_DIRECTORY ${HIP_LIB_OUTPUT_DIR} SOURCES ${_rocm_file}) + WORKING_DIRECTORY ${_ROCM_OBJ_OUT_DIR} SOURCES ${_rocm_file}) endforeach() # compile shared library if(HIP_LIB_SHARED) - add_custom_target(HIP_TARGET_${HIP_LIB_NAME} COMMAND ${ROCM_HIPCC_EXECUTABLE} ${_ROCM_OBJS} -fPIC --shared -o ${HIP_LIB_OUTPUT_DIR}/lib${HIP_LIB_NAME}.so ${_ROCM_FLAGS} ${_ROCM_FULL_PATH_INCLUDE_FLAGS} + add_custom_target(HIP_TARGET_${HIP_LIB_NAME} COMMAND ${ROCM_HIPCC_EXECUTABLE} ${_ROCM_OBJS} -fPIC --shared -o ${HIP_LIB_OUTPUT_DIR}/lib${HIP_LIB_NAME}.so + ${_ROCM_FLAGS} ${_ROCM_FULL_PATH_INCLUDE_FLAGS} WORKING_DIRECTORY ${HIP_LIB_OUTPUT_DIR}) + add_library(${HIP_LIB_NAME} INTERFACE) + target_link_libraries(${HIP_LIB_NAME} INTERFACE ${HIP_LIB_OUTPUT_DIR}/lib${HIP_LIB_NAME}.so) + # add depencies add_dependencies(${HIP_LIB_NAME} HIP_TARGET_${HIP_LIB_NAME}) foreach(_rocm_target IN LISTS _ROCM_OBJ_TARGETS) From 56f5ad22d8046c8af4e3a36f746e4a3d15c2ca62 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Mon, 11 Mar 2019 15:48:52 +0100 Subject: [PATCH 19/28] added full hipblas interace functionality --- src/SDDK/GPU/hipblas_interface.hpp | 197 ++++++++++++++--------------- 1 file changed, 96 insertions(+), 101 deletions(-) diff --git a/src/SDDK/GPU/hipblas_interface.hpp b/src/SDDK/GPU/hipblas_interface.hpp index d1da39e15..2a8090816 100644 --- a/src/SDDK/GPU/hipblas_interface.hpp +++ b/src/SDDK/GPU/hipblas_interface.hpp @@ -28,9 +28,11 @@ #include #include #include +#include #include #include "acc.hpp" #include "hipblas_port.h" +// #include "blas_lapack.h" namespace hipblas { @@ -136,34 +138,33 @@ inline hipblasDiagType_t get_hipblasDiagType_t(char c) } #ifdef NDEBUG -#define CALL_HIPBLAS(func__, args__) \ -{ \ - hipblasStatus_t status; \ - if ((status = func__ args__) != HIPBLAS_STATUS_SUCCESS) { \ - error_message(status); \ - char nm[1024]; \ - gethostname(nm, 1024); \ - printf("hostname: %s\n", nm); \ - printf("Error in %s at line %i of file %s\n", #func__, __LINE__, __FILE__); \ - stack_backtrace(); \ - } \ -} +#define CALL_HIPBLAS(func__, args__) \ + { \ + hipblasStatus_t status; \ + if ((status = func__ args__) != HIPBLAS_STATUS_SUCCESS) { \ + error_message(status); \ + char nm[1024]; \ + gethostname(nm, 1024); \ + printf("hostname: %s\n", nm); \ + printf("Error in %s at line %i of file %s\n", #func__, __LINE__, __FILE__); \ + stack_backtrace(); \ + } \ + hipDeviceSynchronize(); \ + } #else -#define CALL_HIPBLAS(func__, args__) \ -{ \ - hipblasStatus_t status; \ - func__ args__; \ - hipDeviceSynchronize(); \ - status = hipblasGetError(); \ - if (status != HIPBLAS_STATUS_SUCCESS) { \ - error_message(status); \ - char nm[1024]; \ - gethostname(nm, 1024); \ - printf("hostname: %s\n", nm); \ - printf("Error in %s at line %i of file %s\n", #func__, __LINE__, __FILE__); \ - stack_backtrace(); \ - } \ -} +#define CALL_HIPBLAS(func__, args__) \ + { \ + hipblasStatus_t status; \ + if ((status = func__ args__) != HIPBLAS_STATUS_SUCCESS) { \ + error_message(status); \ + char nm[1024]; \ + gethostname(nm, 1024); \ + printf("hostname: %s\n", nm); \ + printf("Error in %s at line %i of file %s\n", #func__, __LINE__, __FILE__); \ + stack_backtrace(); \ + } \ + hipDeviceSynchronize(); \ + } #endif /// Store the default (null) stream handler. @@ -200,109 +201,103 @@ inline void destroy_stream_handles() } } -inline hipblasHandle_t stream_handle(int id__) +inline hipblasHandle_t stream_handle(int id) { - return (id__ == -1) ? null_stream_handle() : stream_handles()[id__]; + return (id == -1) ? null_stream_handle() : stream_handles()[id]; } -inline void zgemv(char transa, int32_t m, int32_t n, hipDoubleComplex* alpha, hipDoubleComplex* a, int32_t lda, - hipDoubleComplex* x, int32_t incx, hipDoubleComplex* beta, hipDoubleComplex* y, int32_t incy, int stream_id) +inline void zgemv(char transa, int32_t m, int32_t n, hipDoubleComplex* alpha, hipDoubleComplex* a, int32_t lda, + hipDoubleComplex* x, int32_t incx, hipDoubleComplex* beta, hipDoubleComplex* y, int32_t incy, + int stream_id) { - // CALL_HIPBLAS(hipblasZgemv, (stream_handle(stream_id), get_hipblasOperation_t(transa), m, n, alpha, a, lda, x, incx, beta, y, incy)); - throw std::runtime_error("zgemv not implemented in hipblas with ROCM!"); + CALL_HIPBLAS(hipblas_port_Zgemv, (stream_handle(stream_id), get_hipblasOperation_t(transa), m, n, alpha, a, lda, x, + incx, beta, y, incy)); } -inline void zgemm(char transa, char transb, int32_t m, int32_t n, int32_t k, - hipDoubleComplex const* alpha, hipDoubleComplex const* a, int32_t lda, hipDoubleComplex const* b, - int32_t ldb, hipDoubleComplex const* beta, hipDoubleComplex* c, int32_t ldc, int stream_id) +inline void zgemm(char transa, char transb, int32_t m, int32_t n, int32_t k, hipDoubleComplex const* alpha, + hipDoubleComplex const* a, int32_t lda, hipDoubleComplex const* b, int32_t ldb, + hipDoubleComplex const* beta, hipDoubleComplex* c, int32_t ldc, int stream_id) { - // CALL_HIPBLAS(hipblasZgemm, (stream_handle(stream_id), get_hipblasOperation_t(transa), get_hipblasOperation_t(transb), - // m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); - throw std::runtime_error("zgemm not implemented in hipblas with ROCM!"); + CALL_HIPBLAS(hipblas_port_Zgemm, (stream_handle(stream_id), get_hipblasOperation_t(transa), + get_hipblasOperation_t(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } -inline void dgemm(char transa, char transb, int32_t m, int32_t n, int32_t k, - double const* alpha, double const* a, int32_t lda, double const* b, - int32_t ldb, double const* beta, double* c, int32_t ldc, int stream_id) +inline void dgemm(char transa, char transb, int32_t m, int32_t n, int32_t k, double const* alpha, double const* a, + int32_t lda, double const* b, int32_t ldb, double const* beta, double* c, int32_t ldc, int stream_id) { - CALL_HIPBLAS(hipblasDgemm, (stream_handle(stream_id), get_hipblasOperation_t(transa), get_hipblasOperation_t(transb), - m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + CALL_HIPBLAS(hipblasDgemm, (stream_handle(stream_id), get_hipblasOperation_t(transa), + get_hipblasOperation_t(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } -inline void dtrmm(char side__, char uplo__, char transa__, char diag__, int m__, int n__, - double const* alpha__, double const* A__, int lda__, double* B__, int ldb__) +inline void dtrmm(char side, char uplo, char transa, char diag, int m, int n, double const* alpha, + double const* A, int lda, double* B, int ldb) { - // throw std::runtime_error("dtrmm not implemented in hipblas with ROCM!"); - hipblasSideMode_t side = get_hipblasSideMode_t(side__); - hipblasFillMode_t uplo = get_hipblasFillMode_t(uplo__); - hipblasOperation_t transa = get_hipblasOperation_t(transa__); - hipblasDiagType_t diag = get_hipblasDiagType_t(diag__); - CALL_HIPBLAS(hipblas_port_Dtrmm, (null_stream_handle(), side, uplo, transa, diag, m__, n__, alpha__, A__, lda__, - B__, ldb__, B__, ldb__)); + hipblasSideMode_t side_gpu = get_hipblasSideMode_t(side); + hipblasFillMode_t uplo_gpu = get_hipblasFillMode_t(uplo); + hipblasOperation_t transa_gpu = get_hipblasOperation_t(transa); + hipblasDiagType_t diag_gpu = get_hipblasDiagType_t(diag); + CALL_HIPBLAS(hipblas_port_Dtrmm, + (null_stream_handle(), side_gpu, uplo_gpu, transa_gpu, diag_gpu, m, n, alpha, A, lda, B, ldb, B, ldb)); } -inline void ztrmm(char side__, - char uplo__, - char transa__, - char diag__, - int m__, - int n__, - hipDoubleComplex const* alpha__, - hipDoubleComplex const* A__, - int lda__, - hipDoubleComplex* B__, - int ldb__) +inline void ztrmm(char side, char uplo, char transa, char diag, int m, int n, + hipDoubleComplex const* alpha, hipDoubleComplex const* A, int lda, hipDoubleComplex* B, + int ldb) { - // throw std::runtime_error("ztrmm not implemented in hipblas with ROCM!"); - hipblasSideMode_t side = get_hipblasSideMode_t(side__); - hipblasFillMode_t uplo = get_hipblasFillMode_t(uplo__); - hipblasOperation_t transa = get_hipblasOperation_t(transa__); - hipblasDiagType_t diag = get_hipblasDiagType_t(diag__); - CALL_HIPBLAS(hipblas_port_Ztrmm, (null_stream_handle(), side, uplo, transa, diag, m__, n__, alpha__, A__, lda__, - B__, ldb__, B__, ldb__)); + hipblasSideMode_t side_gpu = get_hipblasSideMode_t(side); + hipblasFillMode_t uplo_gpu = get_hipblasFillMode_t(uplo); + hipblasOperation_t transa_gpu = get_hipblasOperation_t(transa); + hipblasDiagType_t diag_gpu = get_hipblasDiagType_t(diag); + CALL_HIPBLAS(hipblas_port_Ztrmm, + (null_stream_handle(), side_gpu, uplo_gpu, transa_gpu, diag_gpu, m, n, alpha, A, lda, B, ldb, B, ldb)); + + // copy to host, calculate, copy back + // int size_A, size_B; + // size_B = n * ldb; + // if (side == 'l' || side == 'L') { + // if (transa == 'n' || transa == 'N') + // size_A = m * lda; + // else + // size_A = n * lda; + // } else { + // if (transa == 'n' || transa == 'N') + // size_A = n * lda; + // else + // size_A = m * lda; + // } + // std::vector A_host(size_A); + // std::vector B_host(size_B); + // acc::copyout(A_host.data(), A, A_host.size()); + // acc::copyout(B_host.data(), B, B_host.size()); + // ftn_int mf = m; + // ftn_int nf = n; + // ftn_int ldaf = lda; + // ftn_int ldbf = ldb; + // FORTRAN(dtrmm) + // (&side, &uplo, &transa, "N", &mf, &nf, const_cast((const ftn_double*)alpha), + // ((ftn_double*)A_host.data()), &ldaf, ((ftn_double*)B_host.data()), &ldbf, (ftn_len)1, + // (ftn_len)1, (ftn_len)1, (ftn_len)1); + // acc::copyin(const_cast(B), B_host.data(), B_host.size()); } -inline void dger(int m, - int n, - double const* alpha, - double const* x, - int incx, - double const* y, - int incy, - double* A, - int lda, - int stream_id) +inline void dger(int m, int n, double const* alpha, double const* x, int incx, double const* y, int incy, double* A, + int lda, int stream_id) { CALL_HIPBLAS(hipblasDger, (stream_handle(stream_id), m, n, alpha, x, incx, y, incy, A, lda)); } -inline void zgeru(int m, - int n, - hipDoubleComplex const* alpha, - hipDoubleComplex const* x, - int incx, - hipDoubleComplex const* y, - int incy, - hipDoubleComplex* A, - int lda, - int stream_id) +inline void zgeru(int m, int n, hipDoubleComplex const* alpha, hipDoubleComplex const* x, int incx, + hipDoubleComplex const* y, int incy, hipDoubleComplex* A, int lda, int stream_id) { - // throw std::runtime_error("zgeru not implemented in hipblas with ROCM!"); CALL_HIPBLAS(hipblas_port_Zgeru, (stream_handle(stream_id), m, n, alpha, x, incx, y, incy, A, lda)); } -inline void zaxpy(int n__, - hipDoubleComplex const* alpha__, - hipDoubleComplex const* x__, - int incx__, - hipDoubleComplex* y__, - int incy__) +inline void zaxpy(int n, hipDoubleComplex const* alpha, hipDoubleComplex const* x, int incx, + hipDoubleComplex* y, int incy) { - // throw std::runtime_error("zaxpy not implemented in hipblas with ROCM!"); - CALL_HIPBLAS(hipblas_port_Zaxpy, (null_stream_handle(), n__, alpha__, x__, incx__, y__, incy__)); + CALL_HIPBLAS(hipblas_port_Zaxpy, (null_stream_handle(), n, alpha, x, incx, y, incy)); } - } // namespace hipblas #endif From 04c548ee775843cebbf6ae1c5e4f7ec110ad69a6 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Thu, 14 Mar 2019 15:42:14 +0100 Subject: [PATCH 20/28] removed depency on HIP headers for cuda builds --- CMakeLists.txt | 3 - cmake/modules/FindROCM.cmake | 2 +- src/Band/diag_pseudo_potential.hpp | 8 +- src/Band/residuals.hpp | 7 +- src/Hamiltonian/local_operator.hpp | 15 +- src/Kernels/add_pw_ekin.cu | 21 +- src/Kernels/create_beta_gk.cu | 17 +- src/Kernels/cuda_uspp_kernels.cu | 105 +- src/Kernels/density_rg.cu | 23 +- src/Kernels/generate_dm_pw.cu | 17 +- src/Kernels/generate_phase_factors.cu | 11 +- src/Kernels/mul_by_veff.cu | 42 +- src/Kernels/mul_veff_with_phase_factors.cu | 21 +- src/Kernels/random.cu | 2 +- src/Kernels/residuals_aux.cu | 55 +- src/Kernels/spline.cu | 7 +- src/Kernels/sum_q_pw_dm_pw.cu | 16 +- src/Potential/xc_functional_base.hpp | 12 +- src/SDDK/GPU/acc.hpp | 72 +- src/SDDK/GPU/acc_runtime.hpp | 75 + src/SDDK/GPU/checksum.cu | 25 +- src/SDDK/GPU/cuda_common.hpp | 3 +- src/SDDK/GPU/fft_kernels.cu | 134 +- src/SDDK/GPU/gpublas_interface.hpp | 1 - src/SDDK/GPU/scale_matrix.cu | 31 +- src/SDDK/fft3d.hpp | 2 - src/SDDK/linalg.hpp | 36 +- src/SDDK/matrix_storage.hpp | 3 +- src/utils/amd_hip/hip/channel_descriptor.h | 39 - src/utils/amd_hip/hip/device_functions.h | 36 - src/utils/amd_hip/hip/driver_types.h | 36 - .../hip/hcc_detail/channel_descriptor.h | 346 - .../hip/hcc_detail/code_object_bundle.hpp | 139 - src/utils/amd_hip/hip/hcc_detail/concepts.hpp | 30 - src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h | 1 - .../hip/hcc_detail/cuda/math_functions.h | 1 - .../amd_hip/hip/hcc_detail/device_functions.h | 1078 -- .../hip/hcc_detail/device_library_decls.h | 118 - .../amd_hip/hip/hcc_detail/driver_types.h | 314 - .../hip/hcc_detail/functional_grid_launch.hpp | 158 - .../amd_hip/hip/hcc_detail/grid_launch.h | 69 - .../amd_hip/hip/hcc_detail/grid_launch.hpp | 50 - .../hip/hcc_detail/grid_launch_GGL.hpp | 30 - src/utils/amd_hip/hip/hcc_detail/helpers.hpp | 110 - src/utils/amd_hip/hip/hcc_detail/hip_atomic.h | 269 - .../amd_hip/hip/hcc_detail/hip_complex.h | 356 - src/utils/amd_hip/hip/hcc_detail/hip_db.h | 21 - src/utils/amd_hip/hip/hcc_detail/hip_fp16.h | 1645 --- .../amd_hip/hip/hcc_detail/hip_fp16_gcc.h | 257 - .../hip/hcc_detail/hip_fp16_math_fwd.h | 82 - src/utils/amd_hip/hip/hcc_detail/hip_ldg.h | 103 - src/utils/amd_hip/hip/hcc_detail/hip_memory.h | 114 - .../amd_hip/hip/hcc_detail/hip_prof_api.h | 204 - .../amd_hip/hip/hcc_detail/hip_prof_str.h | 2512 ---- .../amd_hip/hip/hcc_detail/hip_runtime.h | 481 - .../amd_hip/hip/hcc_detail/hip_runtime_api.h | 2860 ---- .../hip/hcc_detail/hip_surface_types.h | 54 - .../hip/hcc_detail/hip_texture_types.h | 77 - .../amd_hip/hip/hcc_detail/hip_vector_types.h | 880 -- .../amd_hip/hip/hcc_detail/host_defines.h | 95 - .../amd_hip/hip/hcc_detail/llvm_intrinsics.h | 70 - .../hcc_detail/macro_based_grid_launch.hpp | 798 -- .../amd_hip/hip/hcc_detail/math_functions.h | 1501 --- src/utils/amd_hip/hip/hcc_detail/math_fwd.h | 706 - .../amd_hip/hip/hcc_detail/program_state.hpp | 108 - .../hip/hcc_detail/surface_functions.h | 59 - .../hip/hcc_detail/texture_functions.h | 11102 ---------------- .../amd_hip/hip/hcc_detail/texture_types.h | 107 - src/utils/amd_hip/hip/hip_common.h | 79 - src/utils/amd_hip/hip/hip_complex.h | 36 - src/utils/amd_hip/hip/hip_fp16.h | 36 - src/utils/amd_hip/hip/hip_hcc.h | 105 - src/utils/amd_hip/hip/hip_profile.h | 42 - src/utils/amd_hip/hip/hip_runtime.h | 67 - src/utils/amd_hip/hip/hip_runtime_api.h | 342 - src/utils/amd_hip/hip/hip_texture_types.h | 36 - src/utils/amd_hip/hip/hip_vector_types.h | 41 - src/utils/amd_hip/hip/math_functions.h | 40 - .../hip/nvcc_detail/channel_descriptor.h | 28 - .../amd_hip/hip/nvcc_detail/hip_complex.h | 119 - .../amd_hip/hip/nvcc_detail/hip_runtime.h | 126 - .../amd_hip/hip/nvcc_detail/hip_runtime_api.h | 1286 -- .../hip/nvcc_detail/hip_texture_types.h | 6 - src/utils/amd_hip/hip/texture_types.h | 36 - 84 files changed, 428 insertions(+), 29779 deletions(-) create mode 100644 src/SDDK/GPU/acc_runtime.hpp delete mode 100644 src/utils/amd_hip/hip/channel_descriptor.h delete mode 100644 src/utils/amd_hip/hip/device_functions.h delete mode 100644 src/utils/amd_hip/hip/driver_types.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/channel_descriptor.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/code_object_bundle.hpp delete mode 100644 src/utils/amd_hip/hip/hcc_detail/concepts.hpp delete mode 100644 src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/cuda/math_functions.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/device_functions.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/device_library_decls.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/driver_types.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/functional_grid_launch.hpp delete mode 100644 src/utils/amd_hip/hip/hcc_detail/grid_launch.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/grid_launch.hpp delete mode 100644 src/utils/amd_hip/hip/hcc_detail/grid_launch_GGL.hpp delete mode 100644 src/utils/amd_hip/hip/hcc_detail/helpers.hpp delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_atomic.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_complex.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_db.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_fp16.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_fp16_gcc.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_fp16_math_fwd.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_ldg.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_memory.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_prof_api.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_prof_str.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_runtime.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_runtime_api.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_surface_types.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_texture_types.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/hip_vector_types.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/host_defines.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/llvm_intrinsics.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/macro_based_grid_launch.hpp delete mode 100644 src/utils/amd_hip/hip/hcc_detail/math_functions.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/math_fwd.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/program_state.hpp delete mode 100644 src/utils/amd_hip/hip/hcc_detail/surface_functions.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/texture_functions.h delete mode 100644 src/utils/amd_hip/hip/hcc_detail/texture_types.h delete mode 100644 src/utils/amd_hip/hip/hip_common.h delete mode 100644 src/utils/amd_hip/hip/hip_complex.h delete mode 100644 src/utils/amd_hip/hip/hip_fp16.h delete mode 100644 src/utils/amd_hip/hip/hip_hcc.h delete mode 100644 src/utils/amd_hip/hip/hip_profile.h delete mode 100644 src/utils/amd_hip/hip/hip_runtime.h delete mode 100644 src/utils/amd_hip/hip/hip_runtime_api.h delete mode 100644 src/utils/amd_hip/hip/hip_texture_types.h delete mode 100644 src/utils/amd_hip/hip/hip_vector_types.h delete mode 100644 src/utils/amd_hip/hip/math_functions.h delete mode 100644 src/utils/amd_hip/hip/nvcc_detail/channel_descriptor.h delete mode 100644 src/utils/amd_hip/hip/nvcc_detail/hip_complex.h delete mode 100644 src/utils/amd_hip/hip/nvcc_detail/hip_runtime.h delete mode 100644 src/utils/amd_hip/hip/nvcc_detail/hip_runtime_api.h delete mode 100644 src/utils/amd_hip/hip/nvcc_detail/hip_texture_types.h delete mode 100644 src/utils/amd_hip/hip/texture_types.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a16b0af5e..9382bc500 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,9 +122,6 @@ if(USE_CUDA) #list(APPEND CMAKE_CUDA_FLAGS "-arch=sm_61") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -arch=sm_61") endif() - # tell HIP to call cuda api - add_definitions("-D__HIP_PLATFORM_NVCC__") - include_directories(BEFORE ${PROJECT_SOURCE_DIR}/src/utils/amd_hip) endif(USE_CUDA) if(USE_MAGMA) diff --git a/cmake/modules/FindROCM.cmake b/cmake/modules/FindROCM.cmake index 5bbde5352..f8d2f6b6b 100644 --- a/cmake/modules/FindROCM.cmake +++ b/cmake/modules/FindROCM.cmake @@ -405,7 +405,7 @@ macro(rocm_hip_add_library) # compile shared library if(HIP_LIB_SHARED) - add_custom_target(HIP_TARGET_${HIP_LIB_NAME} COMMAND ${ROCM_HIPCC_EXECUTABLE} ${_ROCM_OBJS} -fPIC --shared -o ${HIP_LIB_OUTPUT_DIR}/lib${HIP_LIB_NAME}.so + add_custom_target(HIP_TARGET_${HIP_LIB_NAME} COMMAND ${ROCM_HIPCC_EXECUTABLE} ${_ROCM_OBJS} -fPIC --shared -o ${HIP_LIB_OUTPUT_DIR}/lib${HIP_LIB_NAME}.so ${_ROCM_FLAGS} ${_ROCM_FULL_PATH_INCLUDE_FLAGS} WORKING_DIRECTORY ${HIP_LIB_OUTPUT_DIR}) diff --git a/src/Band/diag_pseudo_potential.hpp b/src/Band/diag_pseudo_potential.hpp index a00ac9972..ba4e678cf 100644 --- a/src/Band/diag_pseudo_potential.hpp +++ b/src/Band/diag_pseudo_potential.hpp @@ -24,14 +24,14 @@ #if defined(__GPU) && defined(__CUDA) -#include +#include "../SDDK/GPU/acc.hpp" extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, int n, double c, double r, - hipDoubleComplex* phi0, - hipDoubleComplex* phi1, - hipDoubleComplex* phi2); + acc_complex_double_t* phi0, + acc_complex_double_t* phi1, + acc_complex_double_t* phi2); #endif template diff --git a/src/Band/residuals.hpp b/src/Band/residuals.hpp index cb0461e61..d6e59ed2c 100644 --- a/src/Band/residuals.hpp +++ b/src/Band/residuals.hpp @@ -193,15 +193,14 @@ static void normalize_res(device_t pu__, } case GPU: { #ifdef __GPU - scale_matrix_columns_gpu(res__.pw_coeffs(ispn).num_rows_loc(), - num_bands__, - res__.pw_coeffs(ispn).prime().at(memory_t::device), + scale_matrix_columns_gpu(res__.pw_coeffs(ispn).num_rows_loc(), num_bands__, + (acc_complex_double_t*)res__.pw_coeffs(ispn).prime().at(memory_t::device), p_norm__.at(memory_t::device)); if (res__.has_mt()) { scale_matrix_columns_gpu(res__.mt_coeffs(ispn).num_rows_loc(), num_bands__, - res__.mt_coeffs(ispn).prime().at(memory_t::device), + (acc_complex_double_t *)res__.mt_coeffs(ispn).prime().at(memory_t::device), p_norm__.at(memory_t::device)); } #endif diff --git a/src/Hamiltonian/local_operator.hpp b/src/Hamiltonian/local_operator.hpp index 4a395ff84..6c3a65cee 100644 --- a/src/Hamiltonian/local_operator.hpp +++ b/src/Hamiltonian/local_operator.hpp @@ -26,6 +26,7 @@ #define __LOCAL_OPERATOR_HPP__ #include "Potential/potential.hpp" +#include "../SDDK/GPU/acc.hpp" #ifdef __GPU extern "C" void mul_by_veff_gpu(int ispn__, int size__, double* const* veff__, double_complex* buf__); @@ -881,7 +882,8 @@ class Local_operator acc::copy(buf_rg_.at(memory_t::device), fft_coarse_.buffer().at(memory_t::device), fft_coarse_.local_size()); } /* multiply by step function */ - scale_matrix_rows_gpu(fft_coarse_.local_size(), 1, fft_coarse_.buffer().at(memory_t::device), + scale_matrix_rows_gpu(fft_coarse_.local_size(), 1, + (acc_complex_double_t*)fft_coarse_.buffer().at(memory_t::device), theta_.f_rg().at(memory_t::device)); /* phi(r) * Theta(r) -> ophi(G) */ fft_coarse_.transform<-1>(ophi__->pw_coeffs(0).extra().at(memory_t::host, 0, j)); @@ -892,7 +894,8 @@ class Local_operator } if (hphi__ != nullptr) { /* multiply by effective potential */ - scale_matrix_rows_gpu(fft_coarse_.local_size(), 1, fft_coarse_.buffer().at(memory_t::device), + scale_matrix_rows_gpu(fft_coarse_.local_size(), 1, + (acc_complex_double_t*)fft_coarse_.buffer().at(memory_t::device), veff_vec_[0].f_rg().at(memory_t::device)); /* phi(r) * Theta(r) * V(r) -> hphi(G) */ fft_coarse_.transform<-1>(hphi__->pw_coeffs(0).extra().at(memory_t::host, 0, j)); @@ -928,7 +931,9 @@ class Local_operator case GPU: { #if defined(__GPU) /* multiply by step function */ - scale_matrix_rows_gpu(fft_coarse_.local_size(), 1, fft_coarse_.buffer().at(memory_t::device), theta_.f_rg().at(memory_t::device)); + scale_matrix_rows_gpu(fft_coarse_.local_size(), 1, + (acc_complex_double_t*)fft_coarse_.buffer().at(memory_t::device), + theta_.f_rg().at(memory_t::device)); #endif break; } @@ -1034,7 +1039,9 @@ class Local_operator /* phi(G) -> phi(r) */ fft_coarse_.transform<1>(phi__.pw_coeffs(0).extra().at(memory_t::host, 0, j)); /* multiply by Bz */ - scale_matrix_rows_gpu(fft_coarse_.local_size(), 1, fft_coarse_.buffer().at(memory_t::device), veff_vec_[1].f_rg().at(memory_t::device)); + scale_matrix_rows_gpu(fft_coarse_.local_size(), 1, + (acc_complex_double_t*)fft_coarse_.buffer().at(memory_t::device), + veff_vec_[1].f_rg().at(memory_t::device)); /* phi(r) * Bz(r) -> bphi[0](G) */ fft_coarse_.transform<-1>(bphi__[0].pw_coeffs(0).extra().at(memory_t::host, 0, j)); #else diff --git a/src/Kernels/add_pw_ekin.cu b/src/Kernels/add_pw_ekin.cu index 1860f8da7..5ea3afeeb 100644 --- a/src/Kernels/add_pw_ekin.cu +++ b/src/Kernels/add_pw_ekin.cu @@ -23,21 +23,20 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" __global__ void add_pw_ekin_gpu_kernel(int num_gvec__, double alpha__, double const* pw_ekin__, - hipDoubleComplex const* phi__, - hipDoubleComplex const* vphi__, - hipDoubleComplex* hphi__) + acc_complex_double_t const* phi__, + acc_complex_double_t const* vphi__, + acc_complex_double_t* hphi__) { int ig = blockIdx.x * blockDim.x + threadIdx.x; if (ig < num_gvec__) { - hipDoubleComplex z1 = hipCadd(vphi__[ig], make_hipDoubleComplex(alpha__ * pw_ekin__[ig] * phi__[ig].x, + acc_complex_double_t z1 = accCadd(vphi__[ig], make_accDoubleComplex(alpha__ * pw_ekin__[ig] * phi__[ig].x, alpha__ * pw_ekin__[ig] * phi__[ig].y)); - hphi__[ig] = hipCadd(hphi__[ig], z1); + hphi__[ig] = accCadd(hphi__[ig], z1); } } @@ -48,14 +47,14 @@ __global__ void add_pw_ekin_gpu_kernel(int num_gvec__, extern "C" void add_pw_ekin_gpu(int num_gvec__, double alpha__, double const* pw_ekin__, - hipDoubleComplex const* phi__, - hipDoubleComplex const* vphi__, - hipDoubleComplex* hphi__) + acc_complex_double_t const* phi__, + acc_complex_double_t const* vphi__, + acc_complex_double_t* hphi__) { dim3 grid_t(64); dim3 grid_b(num_blocks(num_gvec__, grid_t.x)); - hipLaunchKernelGGL((add_pw_ekin_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((add_pw_ekin_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gvec__, alpha__, pw_ekin__, diff --git a/src/Kernels/create_beta_gk.cu b/src/Kernels/create_beta_gk.cu index ed01407d5..8863dfcb5 100644 --- a/src/Kernels/create_beta_gk.cu +++ b/src/Kernels/create_beta_gk.cu @@ -23,8 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" #ifdef __CUDA #include "../SDDK/GPU/cuda_timer.hpp" @@ -34,10 +33,10 @@ __global__ void create_beta_gk_gpu_kernel ( int num_gkvec__, int const* beta_desc__, - hipDoubleComplex const* beta_gk_t, + acc_complex_double_t const* beta_gk_t, double const* gkvec, double const* atom_pos, - hipDoubleComplex* beta_gk + acc_complex_double_t* beta_gk ) { int ia = blockIdx.y; @@ -59,8 +58,8 @@ __global__ void create_beta_gk_gpu_kernel for (int xi = 0; xi < nbf; xi++) { beta_gk[array2D_offset(igk, offset_beta_gk + xi, num_gkvec__)] = - hipCmul(beta_gk_t[array2D_offset(igk, offset_beta_gk_t + xi, num_gkvec__)], - make_hipDoubleComplex(cosp, -sinp)); + accCmul(beta_gk_t[array2D_offset(igk, offset_beta_gk_t + xi, num_gkvec__)], + make_accDoubleComplex(cosp, -sinp)); } } } @@ -68,10 +67,10 @@ __global__ void create_beta_gk_gpu_kernel extern "C" void create_beta_gk_gpu(int num_atoms, int num_gkvec, int const* beta_desc, - hipDoubleComplex const* beta_gk_t, + acc_complex_double_t const* beta_gk_t, double const* gkvec, double const* atom_pos, - hipDoubleComplex* beta_gk) + acc_complex_double_t* beta_gk) { #ifdef __CUDA CUDA_timer t("create_beta_gk_gpu"); @@ -80,7 +79,7 @@ extern "C" void create_beta_gk_gpu(int num_atoms, dim3 grid_t(64); dim3 grid_b(num_blocks(num_gkvec, grid_t.x), num_atoms); - hipLaunchKernelGGL((create_beta_gk_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((create_beta_gk_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gkvec, beta_desc, beta_gk_t, diff --git a/src/Kernels/cuda_uspp_kernels.cu b/src/Kernels/cuda_uspp_kernels.cu index dff012510..9fa3eaee0 100644 --- a/src/Kernels/cuda_uspp_kernels.cu +++ b/src/Kernels/cuda_uspp_kernels.cu @@ -23,18 +23,17 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" -extern hipStream_t* streams; +extern acc_stream_t* streams; __global__ void compute_chebyshev_order1_gpu_kernel ( int num_gkvec__, double c__, double r__, - hipDoubleComplex* phi0__, - hipDoubleComplex* phi1__ + acc_complex_double_t* phi0__, + acc_complex_double_t* phi1__ ) { int igk = blockDim.x * blockIdx.x + threadIdx.x; @@ -44,11 +43,11 @@ __global__ void compute_chebyshev_order1_gpu_kernel { int i = array2D_offset(igk, j, num_gkvec__); // phi0 * c - hipDoubleComplex z1 = hipCmul(phi0__[i], make_hipDoubleComplex(c__, 0)); + acc_complex_double_t z1 = accCmul(phi0__[i], make_accDoubleComplex(c__, 0)); // phi1 - phi0 * c - hipDoubleComplex z2 = hipCsub(phi1__[i], z1); + acc_complex_double_t z2 = accCsub(phi1__[i], z1); // (phi1 - phi0 * c) / r - phi1__[i] = hipCdiv(z2, make_hipDoubleComplex(r__, 0)); + phi1__[i] = accCdiv(z2, make_accDoubleComplex(r__, 0)); } } @@ -57,9 +56,9 @@ __global__ void compute_chebyshev_orderk_gpu_kernel int num_gkvec__, double c__, double r__, - hipDoubleComplex* phi0__, - hipDoubleComplex* phi1__, - hipDoubleComplex* phi2__ + acc_complex_double_t* phi0__, + acc_complex_double_t* phi1__, + acc_complex_double_t* phi2__ ) { int igk = blockDim.x * blockIdx.x + threadIdx.x; @@ -69,13 +68,13 @@ __global__ void compute_chebyshev_orderk_gpu_kernel { int i = array2D_offset(igk, j, num_gkvec__); // phi1 * c - hipDoubleComplex z1 = hipCmul(phi1__[i], make_hipDoubleComplex(c__, 0)); + acc_complex_double_t z1 = accCmul(phi1__[i], make_accDoubleComplex(c__, 0)); // phi2 - phi1 * c - hipDoubleComplex z2 = hipCsub(phi2__[i], z1); + acc_complex_double_t z2 = accCsub(phi2__[i], z1); // (phi2 - phi1 * c) * 2 / r - hipDoubleComplex z3 = hipCmul(z2, make_hipDoubleComplex(2.0 / r__, 0)); + acc_complex_double_t z3 = accCmul(z2, make_accDoubleComplex(2.0 / r__, 0)); // (phi2 - phi1 * c) * 2 / r - phi0 - phi2__[i] = hipCsub(z3, phi0__[i]); + phi2__[i] = accCsub(z3, phi0__[i]); } } @@ -83,16 +82,16 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, int n, double c, double r, - hipDoubleComplex* phi0, - hipDoubleComplex* phi1, - hipDoubleComplex* phi2) + acc_complex_double_t* phi0, + acc_complex_double_t* phi1, + acc_complex_double_t* phi2) { dim3 grid_t(64); dim3 grid_b(num_blocks(num_gkvec, grid_t.x), n); if (phi2 == NULL) { - hipLaunchKernelGGL((compute_chebyshev_order1_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((compute_chebyshev_order1_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gkvec, c, r, @@ -102,7 +101,7 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, } else { - hipLaunchKernelGGL((compute_chebyshev_orderk_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((compute_chebyshev_orderk_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gkvec, c, r, @@ -122,9 +121,9 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== int* beta_t_idx, //== double* atom_pos, //== double* gkvec, -//== hipDoubleComplex* beta_pw_type, -//== hipDoubleComplex* phi, -//== hipDoubleComplex* beta_phi) +//== acc_complex_double_t* beta_pw_type, +//== acc_complex_double_t* phi, +//== acc_complex_double_t* beta_phi) //== { //== int idx_beta = blockDim.x * blockIdx.x + threadIdx.x; //== int idx_phi = blockDim.y * blockIdx.y + threadIdx.y; @@ -142,12 +141,12 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== //== int N = num_blocks(num_gkvec, BLOCK_SIZE); //== -//== hipDoubleComplex val = make_hipDoubleComplex(0.0, 0.0); +//== acc_complex_double_t val = make_accDoubleComplex(0.0, 0.0); //== //== for (int m = 0; m < N; m++) //== { -//== __shared__ hipDoubleComplex beta_pw_tile[BLOCK_SIZE][BLOCK_SIZE]; -//== __shared__ hipDoubleComplex phi_tile[BLOCK_SIZE][BLOCK_SIZE]; +//== __shared__ acc_complex_double_t beta_pw_tile[BLOCK_SIZE][BLOCK_SIZE]; +//== __shared__ acc_complex_double_t phi_tile[BLOCK_SIZE][BLOCK_SIZE]; //== //== int bs = (m + 1) * BLOCK_SIZE > num_gkvec ? num_gkvec - m * BLOCK_SIZE : BLOCK_SIZE; //== @@ -163,8 +162,8 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== double sinp = sin(p); //== double cosp = cos(p); //== -//== beta_pw_tile[threadIdx.x][threadIdx.y] = hipCmul(hipConj(beta_pw_type[array2D_offset(igk, offset_t, num_gkvec)]), -//== make_hipDoubleComplex(cosp, sinp)); +//== beta_pw_tile[threadIdx.x][threadIdx.y] = accCmul(accConj(beta_pw_type[array2D_offset(igk, offset_t, num_gkvec)]), +//== make_accDoubleComplex(cosp, sinp)); //== //== } //== @@ -175,7 +174,7 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== //== __syncthreads(); //== -//== for (int i = 0; i < bs; i++) val = hipCadd(val, hipCmul(beta_pw_tile[threadIdx.x][i], phi_tile[threadIdx.y][i])); +//== for (int i = 0; i < bs; i++) val = accCadd(val, accCmul(beta_pw_tile[threadIdx.x][i], phi_tile[threadIdx.y][i])); //== //== __syncthreads(); //== } @@ -198,15 +197,15 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //== dim3 threadsPerBlock(BLOCK_SIZE, BLOCK_SIZE); //== dim3 numBlocks(num_blocks(num_beta, BLOCK_SIZE), num_blocks(num_phi, BLOCK_SIZE)); //== -//== hipLaunchKernelGGL((generate_beta_phi_gpu_kernel), dim3(//== numBlocks), dim3(//== threadsPerBlock), 0, 0, num_gkvec, +//== accLaunchKernel((generate_beta_phi_gpu_kernel), dim3(//== numBlocks), dim3(//== threadsPerBlock), 0, 0, num_gkvec, //== num_beta, //== num_phi, //== beta_t_idx, //== atom_pos, //== gkvec, -//== (hipDoubleComplex*)beta_pw_type, -//== (hipDoubleComplex*)phi, -//== (hipDoubleComplex*)beta_phi); +//== (acc_complex_double_t*)beta_pw_type, +//== (acc_complex_double_t*)phi, +//== (acc_complex_double_t*)beta_phi); //== } @@ -214,33 +213,33 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, //__global__ void copy_beta_psi_gpu_kernel //( -// hipDoubleComplex const* beta_psi, +// acc_complex_double_t const* beta_psi, // int beta_psi_ld, // double const* wo, -// hipDoubleComplex* beta_psi_wo, +// acc_complex_double_t* beta_psi_wo, // int beta_psi_wo_ld //) //{ // int xi = threadIdx.x; // int j = blockIdx.x; // -// beta_psi_wo[array2D_offset(xi, j, beta_psi_wo_ld)] = hipCmul(hipConj(beta_psi[array2D_offset(xi, j, beta_psi_ld)]), -// make_hipDoubleComplex(wo[j], 0.0)); +// beta_psi_wo[array2D_offset(xi, j, beta_psi_wo_ld)] = accCmul(accConj(beta_psi[array2D_offset(xi, j, beta_psi_ld)]), +// make_accDoubleComplex(wo[j], 0.0)); //} //extern "C" void copy_beta_psi_gpu(int nbf, // int nloc, -// hipDoubleComplex const* beta_psi, +// acc_complex_double_t const* beta_psi, // int beta_psi_ld, // double const* wo, -// hipDoubleComplex* beta_psi_wo, +// acc_complex_double_t* beta_psi_wo, // int beta_psi_wo_ld, // int stream_id) //{ // dim3 grid_t(nbf); // dim3 grid_b(nloc); // -// hipStream_t stream = (stream_id == -1) ? NULL : streams[stream_id]; +// acc_stream_t stream = (stream_id == -1) ? NULL : streams[stream_id]; // // copy_beta_psi_gpu_kernel <<>> // ( @@ -255,14 +254,14 @@ extern "C" void compute_chebyshev_polynomial_gpu(int num_gkvec, __global__ void compute_inner_product_gpu_kernel ( int num_gkvec_row, - hipDoubleComplex const* f1, - hipDoubleComplex const* f2, + acc_complex_double_t const* f1, + acc_complex_double_t const* f2, double* prod ) { int N = num_blocks(num_gkvec_row, blockDim.x); - HIP_DYNAMIC_SHARED( char, sdata_ptr) + ACC_DYNAMIC_SHARED( char, sdata_ptr) double* sdata = (double*)&sdata_ptr[0]; sdata[threadIdx.x] = 0.0; @@ -290,14 +289,14 @@ __global__ void compute_inner_product_gpu_kernel extern "C" void compute_inner_product_gpu(int num_gkvec_row, int n, - hipDoubleComplex const* f1, - hipDoubleComplex const* f2, + acc_complex_double_t const* f1, + acc_complex_double_t const* f2, double* prod) { dim3 grid_t(64); dim3 grid_b(n); - hipLaunchKernelGGL((compute_inner_product_gpu_kernel), dim3(grid_b), dim3(grid_t), grid_t.x * sizeof(double), 0, + accLaunchKernel((compute_inner_product_gpu_kernel), dim3(grid_b), dim3(grid_t), grid_t.x * sizeof(double), 0, num_gkvec_row, f1, f2, @@ -308,14 +307,14 @@ extern "C" void compute_inner_product_gpu(int num_gkvec_row, __global__ void add_checksum_gpu_kernel ( - hipDoubleComplex const* wf__, + acc_complex_double_t const* wf__, int num_rows_loc__, - hipDoubleComplex* result__ + acc_complex_double_t* result__ ) { int N = num_blocks(num_rows_loc__, blockDim.x); - HIP_DYNAMIC_SHARED( char, sdata_ptr) + ACC_DYNAMIC_SHARED( char, sdata_ptr) double* sdata_x = (double*)&sdata_ptr[0]; double* sdata_y = (double*)&sdata_ptr[blockDim.x * sizeof(double)]; @@ -340,18 +339,18 @@ __global__ void add_checksum_gpu_kernel __syncthreads(); } - result__[blockIdx.x] = hipCadd(result__[blockIdx.x], make_hipDoubleComplex(sdata_x[0], sdata_y[0])); + result__[blockIdx.x] = accCadd(result__[blockIdx.x], make_accDoubleComplex(sdata_x[0], sdata_y[0])); } -extern "C" void add_checksum_gpu(hipDoubleComplex* wf__, +extern "C" void add_checksum_gpu(acc_complex_double_t* wf__, int num_rows_loc__, int nwf__, - hipDoubleComplex* result__) + acc_complex_double_t* result__) { dim3 grid_t(64); dim3 grid_b(nwf__); - hipLaunchKernelGGL((add_checksum_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), 0, + accLaunchKernel((add_checksum_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), 0, wf__, num_rows_loc__, result__ diff --git a/src/Kernels/density_rg.cu b/src/Kernels/density_rg.cu index 0cd208551..fb893b7f8 100644 --- a/src/Kernels/density_rg.cu +++ b/src/Kernels/density_rg.cu @@ -23,24 +23,23 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" __global__ void update_density_rg_1_gpu_kernel(int size__, - hipDoubleComplex const* psi_rg__, + acc_complex_double_t const* psi_rg__, double wt__, double* density_rg__) { int ir = blockIdx.x * blockDim.x + threadIdx.x; if (ir < size__) { - hipDoubleComplex z = psi_rg__[ir]; + acc_complex_double_t z = psi_rg__[ir]; density_rg__[ir] += (z.x * z.x + z.y * z.y) * wt__; } } extern "C" void update_density_rg_1_gpu(int size__, - hipDoubleComplex const* psi_rg__, + acc_complex_double_t const* psi_rg__, double wt__, double* density_rg__) { @@ -49,7 +48,7 @@ extern "C" void update_density_rg_1_gpu(int size__, dim3 grid_t(64); dim3 grid_b(num_blocks(size__, grid_t.x)); - hipLaunchKernelGGL((update_density_rg_1_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((update_density_rg_1_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, psi_rg__, wt__, @@ -58,23 +57,23 @@ extern "C" void update_density_rg_1_gpu(int size__, } __global__ void update_density_rg_2_gpu_kernel(int size__, - hipDoubleComplex const* psi_up_rg__, - hipDoubleComplex const* psi_dn_rg__, + acc_complex_double_t const* psi_up_rg__, + acc_complex_double_t const* psi_dn_rg__, double wt__, double* density_x_rg__, double* density_y_rg__) { int ir = blockIdx.x * blockDim.x + threadIdx.x; if (ir < size__) { - hipDoubleComplex z = hipCmul(psi_up_rg__[ir], hipConj(psi_dn_rg__[ir])); + acc_complex_double_t z = accCmul(psi_up_rg__[ir], accConj(psi_dn_rg__[ir])); density_x_rg__[ir] += 2 * z.x * wt__; density_y_rg__[ir] -= 2 * z.y * wt__; } } extern "C" void update_density_rg_2_gpu(int size__, - hipDoubleComplex const* psi_up_rg__, - hipDoubleComplex const* psi_dn_rg__, + acc_complex_double_t const* psi_up_rg__, + acc_complex_double_t const* psi_dn_rg__, double wt__, double* density_x_rg__, double* density_y_rg__) @@ -84,7 +83,7 @@ extern "C" void update_density_rg_2_gpu(int size__, dim3 grid_t(64); dim3 grid_b(num_blocks(size__, grid_t.x)); - hipLaunchKernelGGL((update_density_rg_2_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((update_density_rg_2_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, psi_up_rg__, psi_dn_rg__, diff --git a/src/Kernels/generate_dm_pw.cu b/src/Kernels/generate_dm_pw.cu index 948569637..b1f39d866 100644 --- a/src/Kernels/generate_dm_pw.cu +++ b/src/Kernels/generate_dm_pw.cu @@ -23,13 +23,8 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "../SDDK/GPU/acc.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" - -#ifdef __GPU +#include "../SDDK/GPU/acc_runtime.hpp" #include "../SDDK/GPU/gpublas_interface.hpp" -#endif __global__ void generate_phase_factors_conj_gpu_kernel ( @@ -37,7 +32,7 @@ __global__ void generate_phase_factors_conj_gpu_kernel int num_atoms__, double const* atom_pos__, int const* gvec__, - hipDoubleComplex* phase_factors__ + acc_complex_double_t* phase_factors__ ) { int ia = blockIdx.y; @@ -53,7 +48,7 @@ __global__ void generate_phase_factors_conj_gpu_kernel int gvz = gvec__[array2D_offset(igloc, 2, num_gvec_loc__)]; double p = twopi * (ax * gvx + ay * gvy + az * gvz); - phase_factors__[array2D_offset(igloc, ia, num_gvec_loc__)] = make_hipDoubleComplex(cos(p), -sin(p)); + phase_factors__[array2D_offset(igloc, ia, num_gvec_loc__)] = make_accDoubleComplex(cos(p), -sin(p)); } } @@ -69,17 +64,17 @@ extern "C" void generate_dm_pw_gpu(int num_atoms__, { //CUDA_timer t("generate_dm_pw_gpu"); - hipStream_t stream = (hipStream_t)acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t)acc::stream(stream_id(stream_id__)); dim3 grid_t(32); dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__); - hipLaunchKernelGGL((generate_phase_factors_conj_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((generate_phase_factors_conj_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, num_gvec_loc__, num_atoms__, atom_pos__, gvec__, - (hipDoubleComplex*)phase_factors__ + (acc_complex_double_t*)phase_factors__ ); double alpha = 1; diff --git a/src/Kernels/generate_phase_factors.cu b/src/Kernels/generate_phase_factors.cu index d6fb737ba..7d14e1194 100644 --- a/src/Kernels/generate_phase_factors.cu +++ b/src/Kernels/generate_phase_factors.cu @@ -23,8 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" __global__ void generate_phase_factors_gpu_kernel ( @@ -32,7 +31,7 @@ __global__ void generate_phase_factors_gpu_kernel int num_atoms, double const* atom_pos, int const* gvec, - hipDoubleComplex* phase_factors + acc_complex_double_t* phase_factors ) { int ia = blockIdx.y; @@ -52,7 +51,7 @@ __global__ void generate_phase_factors_gpu_kernel double sinp = sin(p); double cosp = cos(p); - phase_factors[array2D_offset(igloc, ia, num_gvec_loc)] = make_hipDoubleComplex(cosp, sinp); + phase_factors[array2D_offset(igloc, ia, num_gvec_loc)] = make_accDoubleComplex(cosp, sinp); } } @@ -61,13 +60,13 @@ extern "C" void generate_phase_factors_gpu(int num_gvec_loc__, int num_atoms__, int const* gvec__, double const* atom_pos__, - hipDoubleComplex* phase_factors__) + acc_complex_double_t* phase_factors__) { dim3 grid_t(32); dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__); - hipLaunchKernelGGL((generate_phase_factors_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((generate_phase_factors_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_gvec_loc__, num_atoms__, atom_pos__, diff --git a/src/Kernels/mul_by_veff.cu b/src/Kernels/mul_by_veff.cu index 9dc0efc77..146600a72 100644 --- a/src/Kernels/mul_by_veff.cu +++ b/src/Kernels/mul_by_veff.cu @@ -23,79 +23,77 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "../SDDK/GPU/acc.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" __global__ void mul_by_veff0_gpu_kernel(int size__, double* const* veff__, - hipDoubleComplex* buf__) + acc_complex_double_t* buf__) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < size__) { - hipDoubleComplex z = buf__[i]; + acc_complex_double_t z = buf__[i]; double v0 = veff__[0][i]; - buf__[i] = make_hipDoubleComplex(z.x * v0, z.y * v0); + buf__[i] = make_accDoubleComplex(z.x * v0, z.y * v0); } } __global__ void mul_by_veff1_gpu_kernel(int size__, double* const* veff__, - hipDoubleComplex* buf__) + acc_complex_double_t* buf__) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < size__) { - hipDoubleComplex z = buf__[i]; + acc_complex_double_t z = buf__[i]; double v1 = veff__[1][i]; - buf__[i] = make_hipDoubleComplex(z.x * v1, z.y * v1); + buf__[i] = make_accDoubleComplex(z.x * v1, z.y * v1); } } __global__ void mul_by_veff2_gpu_kernel(int size__, double* const* veff__, - hipDoubleComplex* buf__) + acc_complex_double_t* buf__) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < size__) { - hipDoubleComplex z = buf__[i]; - hipDoubleComplex v = make_hipDoubleComplex(veff__[2][i], -veff__[3][i]); - buf__[i] = hipCmul(z, v); + acc_complex_double_t z = buf__[i]; + acc_complex_double_t v = make_accDoubleComplex(veff__[2][i], -veff__[3][i]); + buf__[i] = accCmul(z, v); } } __global__ void mul_by_veff3_gpu_kernel(int size__, double* const* veff__, - hipDoubleComplex* buf__) + acc_complex_double_t* buf__) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < size__) { - hipDoubleComplex z = buf__[i]; - hipDoubleComplex v = make_hipDoubleComplex(veff__[2][i], veff__[3][i]); - buf__[i] = hipCmul(z, v); + acc_complex_double_t z = buf__[i]; + acc_complex_double_t v = make_accDoubleComplex(veff__[2][i], veff__[3][i]); + buf__[i] = accCmul(z, v); } } -extern "C" void mul_by_veff_gpu(int ispn__, int size__, double* const* veff__, hipDoubleComplex* buf__) +extern "C" void mul_by_veff_gpu(int ispn__, int size__, double* const* veff__, acc_complex_double_t* buf__) { dim3 grid_t(64); dim3 grid_b(num_blocks(size__, grid_t.x)); switch (ispn__) { case 0: { - hipLaunchKernelGGL((mul_by_veff0_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); + accLaunchKernel((mul_by_veff0_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); break; } case 1: { - hipLaunchKernelGGL((mul_by_veff1_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); + accLaunchKernel((mul_by_veff1_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); break; } case 2: { - hipLaunchKernelGGL((mul_by_veff2_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); + accLaunchKernel((mul_by_veff2_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); break; } case 3: { - hipLaunchKernelGGL((mul_by_veff3_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); + accLaunchKernel((mul_by_veff3_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, size__, veff__, buf__); break; } } diff --git a/src/Kernels/mul_veff_with_phase_factors.cu b/src/Kernels/mul_veff_with_phase_factors.cu index c306df47c..db6f493ff 100644 --- a/src/Kernels/mul_veff_with_phase_factors.cu +++ b/src/Kernels/mul_veff_with_phase_factors.cu @@ -23,16 +23,14 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "../SDDK/GPU/acc.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" __global__ void mul_veff_with_phase_factors_gpu_kernel(int num_gvec_loc__, - hipDoubleComplex const* veff__, + acc_complex_double_t const* veff__, int const* gvec__, int num_atoms__, double const* atom_pos__, - hipDoubleComplex* veff_a__) + acc_complex_double_t* veff_a__) { int ia = blockIdx.y; double ax = atom_pos__[array2D_offset(ia, 0, num_atoms__)]; @@ -48,14 +46,15 @@ __global__ void mul_veff_with_phase_factors_gpu_kernel(int num_gvec_loc__, double p = twopi * (ax * gvx + ay * gvy + az * gvz); - //veff_a__[array2D_offset(igloc, ia, num_gvec_loc__)] = hipConj(hipCmul(veff__[igloc], make_hipDoubleComplex(cos(p), sin(p)))); - veff_a__[array2D_offset(igloc, ia, num_gvec_loc__)] = hipCmul(veff__[igloc], make_hipDoubleComplex(cos(p), sin(p))); + //veff_a__[array2D_offset(igloc, ia, num_gvec_loc__)] = accConj(accCmul(veff__[igloc], make_accDoubleComplex(cos(p), sin(p)))); + veff_a__[array2D_offset(igloc, ia, num_gvec_loc__)] = + accCmul(veff__[igloc], make_accDoubleComplex(cos(p), sin(p))); } } extern "C" void mul_veff_with_phase_factors_gpu(int num_atoms__, int num_gvec_loc__, - hipDoubleComplex const* veff__, + acc_complex_double_t const* veff__, int const* gvec__, double const* atom_pos__, double* veff_a__, @@ -64,14 +63,14 @@ extern "C" void mul_veff_with_phase_factors_gpu(int num_atoms__, dim3 grid_t(64); dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__); - hipStream_t stream = (hipStream_t)acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t)acc::stream(stream_id(stream_id__)); - hipLaunchKernelGGL((mul_veff_with_phase_factors_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((mul_veff_with_phase_factors_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, num_gvec_loc__, veff__, gvec__, num_atoms__, atom_pos__, - (hipDoubleComplex*)veff_a__ + (acc_complex_double_t*)veff_a__ ); } diff --git a/src/Kernels/random.cu b/src/Kernels/random.cu index ea9fe99b1..e45ec3efd 100644 --- a/src/Kernels/random.cu +++ b/src/Kernels/random.cu @@ -21,8 +21,8 @@ * * \brief CUDA kernel to compute simple random noise on GPU. */ -//#include "hip/hip_runtime.h" +//#include "../SDDK/GPU/acc_runtime.hpp" //== inline __device__ uint32_t random(size_t seed) //== { //== uint32_t h = 5381; diff --git a/src/Kernels/residuals_aux.cu b/src/Kernels/residuals_aux.cu index 797e7b0b5..71d9d2776 100644 --- a/src/Kernels/residuals_aux.cu +++ b/src/Kernels/residuals_aux.cu @@ -23,16 +23,15 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" __global__ void compute_residuals_gpu_kernel ( int const num_rows_loc__, double const* eval__, - hipDoubleComplex const* hpsi__, - hipDoubleComplex const* opsi__, - hipDoubleComplex* res__ + acc_complex_double_t const* hpsi__, + acc_complex_double_t const* opsi__, + acc_complex_double_t* res__ ) { int j = blockIdx.x * blockDim.x + threadIdx.x; @@ -41,7 +40,7 @@ __global__ void compute_residuals_gpu_kernel if (j < num_rows_loc__) { int k = array2D_offset(j, ibnd, num_rows_loc__); /* res = hpsi_j - e_j * opsi_j */ - res__[k] = hipCsub(hpsi__[k], make_hipDoubleComplex(opsi__[k].x * eval__[ibnd], opsi__[k].y * eval__[ibnd])); + res__[k] = accCsub(hpsi__[k], make_accDoubleComplex(opsi__[k].x * eval__[ibnd], opsi__[k].y * eval__[ibnd])); } } @@ -49,7 +48,7 @@ __global__ void compute_residuals_gpu_kernel //== ( //== int num_gkvec_row, //== int* res_idx, -//== hipDoubleComplex const* res, +//== acc_complex_double_t const* res, //== double* res_norm, //== int reduced, //== int mpi_rank @@ -57,7 +56,7 @@ __global__ void compute_residuals_gpu_kernel //== { //== int N = num_blocks(num_gkvec_row, blockDim.x); //== -//== HIP_DYNAMIC_SHARED( char, sdata_ptr) +//== ACC_DYNAMIC_SHARED( char, sdata_ptr) //== double* sdata = (double*)&sdata_ptr[0]; //== //== sdata[threadIdx.x] = 0.0; @@ -101,11 +100,11 @@ __global__ void compute_residuals_gpu_kernel //== int num_res_local__, //== int* res_idx__, //== double* eval__, -//== hipDoubleComplex const* hpsi__, -//== hipDoubleComplex const* opsi__, +//== acc_complex_double_t const* hpsi__, +//== acc_complex_double_t const* opsi__, //== double const* h_diag__, //== double const* o_diag__, -//== hipDoubleComplex* res__, +//== acc_complex_double_t* res__, //== double* res_norm__, //== double* p_norm__, //== int gkvec_reduced__, @@ -160,9 +159,9 @@ __global__ void compute_residuals_gpu_kernel //== ); //== } -extern "C" void compute_residuals_gpu(hipDoubleComplex* hpsi__, - hipDoubleComplex* opsi__, - hipDoubleComplex* res__, +extern "C" void compute_residuals_gpu(acc_complex_double_t* hpsi__, + acc_complex_double_t* opsi__, + acc_complex_double_t* res__, int num_rows_loc__, int num_bands__, double* eval__) @@ -170,7 +169,7 @@ extern "C" void compute_residuals_gpu(hipDoubleComplex* hpsi__, dim3 grid_t(64); dim3 grid_b(num_blocks(num_rows_loc__, grid_t.x), num_bands__); - hipLaunchKernelGGL((compute_residuals_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((compute_residuals_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_rows_loc__, eval__, hpsi__, @@ -182,7 +181,7 @@ extern "C" void compute_residuals_gpu(hipDoubleComplex* hpsi__, __global__ void add_square_sum_gpu_kernel ( int num_rows_loc__, - hipDoubleComplex const* wf__, + acc_complex_double_t const* wf__, int reduced__, int mpi_rank__, double* result__ @@ -190,7 +189,7 @@ __global__ void add_square_sum_gpu_kernel { int N = num_blocks(num_rows_loc__, blockDim.x); - HIP_DYNAMIC_SHARED( char, sdata_ptr) + ACC_DYNAMIC_SHARED( char, sdata_ptr) double* sdata = (double*)&sdata_ptr[0]; sdata[threadIdx.x] = 0.0; @@ -226,7 +225,7 @@ __global__ void add_square_sum_gpu_kernel } } -extern "C" void add_square_sum_gpu(hipDoubleComplex* wf__, +extern "C" void add_square_sum_gpu(acc_complex_double_t* wf__, int num_rows_loc__, int nwf__, int reduced__, @@ -236,7 +235,7 @@ extern "C" void add_square_sum_gpu(hipDoubleComplex* wf__, dim3 grid_t(64); dim3 grid_b(nwf__); - hipLaunchKernelGGL((add_square_sum_gpu_kernel), dim3(grid_b), dim3(grid_t), grid_t.x * sizeof(double), 0, + accLaunchKernel((add_square_sum_gpu_kernel), dim3(grid_b), dim3(grid_t), grid_t.x * sizeof(double), 0, num_rows_loc__, wf__, reduced__, @@ -249,7 +248,7 @@ __global__ void apply_preconditioner_gpu_kernel(int const num_rows_loc__, double const* eval__, double const* h_diag__, double const* o_diag__, - hipDoubleComplex* res__) + acc_complex_double_t* res__) { int j = blockIdx.x * blockDim.x + threadIdx.x; int ibnd = blockIdx.y; @@ -258,11 +257,11 @@ __global__ void apply_preconditioner_gpu_kernel(int const num_rows_loc__, double p = (h_diag__[j] - eval__[ibnd] * o_diag__[j]); p = 0.5 * (1 + p + sqrt(1.0 + (p - 1) * (p - 1))); int k = array2D_offset(j, ibnd, num_rows_loc__); - res__[k] = make_hipDoubleComplex(res__[k].x / p, res__[k].y / p); + res__[k] = make_accDoubleComplex(res__[k].x / p, res__[k].y / p); } } -extern "C" void apply_preconditioner_gpu(hipDoubleComplex* res__, +extern "C" void apply_preconditioner_gpu(acc_complex_double_t* res__, int num_rows_loc__, int num_bands__, double* eval__, @@ -272,26 +271,26 @@ extern "C" void apply_preconditioner_gpu(hipDoubleComplex* res__, dim3 grid_t(64); dim3 grid_b(num_blocks(num_rows_loc__, grid_t.x), num_bands__); - hipLaunchKernelGGL((apply_preconditioner_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_rows_loc__, eval__, h_diag__, o_diag__, res__); + accLaunchKernel((apply_preconditioner_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, num_rows_loc__, eval__, h_diag__, o_diag__, res__); } -__global__ void make_real_g0_gpu_kernel(hipDoubleComplex* res__, +__global__ void make_real_g0_gpu_kernel(acc_complex_double_t* res__, int ld__) { - hipDoubleComplex z = res__[array2D_offset(0, blockIdx.x, ld__)]; + acc_complex_double_t z = res__[array2D_offset(0, blockIdx.x, ld__)]; if (threadIdx.x == 0) { - res__[array2D_offset(0, blockIdx.x, ld__)] = make_hipDoubleComplex(z.x, 0); + res__[array2D_offset(0, blockIdx.x, ld__)] = make_accDoubleComplex(z.x, 0); } } -extern "C" void make_real_g0_gpu(hipDoubleComplex* res__, +extern "C" void make_real_g0_gpu(acc_complex_double_t* res__, int ld__, int n__) { dim3 grid_t(32); dim3 grid_b(n__); - hipLaunchKernelGGL((make_real_g0_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, res__, ld__); + accLaunchKernel((make_real_g0_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, res__, ld__); } diff --git a/src/Kernels/spline.cu b/src/Kernels/spline.cu index 4d4173491..1a45dbacb 100644 --- a/src/Kernels/spline.cu +++ b/src/Kernels/spline.cu @@ -23,8 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" __global__ void spline_inner_product_gpu_kernel_v3(int num_points__, int const* idx_ri__, @@ -38,7 +37,7 @@ __global__ void spline_inner_product_gpu_kernel_v3(int num_points__, int idx_f = idx_ri__[array2D_offset(0, blockIdx.x, 2)]; int idx_g = idx_ri__[array2D_offset(1, blockIdx.x, 2)]; - HIP_DYNAMIC_SHARED( char, sdata_ptr) + ACC_DYNAMIC_SHARED( char, sdata_ptr) double* sdata = (double*)&sdata_ptr[0]; int a_offs_f = array3D_offset(0, 0, idx_f, num_points__, 4); @@ -139,7 +138,7 @@ extern "C" void spline_inner_product_gpu_v3(int const* idx_ri__, dim3 grid_t(64); dim3 grid_b(num_ri__); - hipLaunchKernelGGL((spline_inner_product_gpu_kernel_v3), dim3(grid_b), dim3(grid_t), grid_t.x * sizeof(double), 0, + accLaunchKernel((spline_inner_product_gpu_kernel_v3), dim3(grid_b), dim3(grid_t), grid_t.x * sizeof(double), 0, num_points__, idx_ri__, x__, diff --git a/src/Kernels/sum_q_pw_dm_pw.cu b/src/Kernels/sum_q_pw_dm_pw.cu index d588b4bcb..807478cc8 100644 --- a/src/Kernels/sum_q_pw_dm_pw.cu +++ b/src/Kernels/sum_q_pw_dm_pw.cu @@ -23,9 +23,7 @@ */ #include "../SDDK/GPU/cuda_common.hpp" -#include "../SDDK/GPU/acc.hpp" -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "../SDDK/GPU/acc_runtime.hpp" #ifdef __CUDA #include "../SDDK/GPU/cuda_timer.hpp" @@ -37,10 +35,10 @@ __global__ void sum_q_pw_dm_pw_gpu_kernel double const* q_pw__, double const* dm_pw__, double const* sym_weight__, - hipDoubleComplex* rho_pw__ + acc_complex_double_t* rho_pw__ ) { - HIP_DYNAMIC_SHARED( char, sdata_ptr) + ACC_DYNAMIC_SHARED( char, sdata_ptr) double* rho_re = (double*)&sdata_ptr[0]; double* rho_im = (double*)&sdata_ptr[sizeof(double) * blockDim.x]; @@ -75,7 +73,7 @@ __global__ void sum_q_pw_dm_pw_gpu_kernel __syncthreads(); } if (threadIdx.x == 0) { - rho_pw__[igloc] = hipCadd(rho_pw__[igloc], make_hipDoubleComplex(rho_re[0], rho_im[0])); + rho_pw__[igloc] = accCadd(rho_pw__[igloc], make_accDoubleComplex(rho_re[0], rho_im[0])); } } @@ -84,19 +82,19 @@ extern "C" void sum_q_pw_dm_pw_gpu(int num_gvec_loc__, double const* q_pw__, double const* dm_pw__, double const* sym_weight__, - hipDoubleComplex* rho_pw__, + acc_complex_double_t* rho_pw__, int stream_id__) { #ifdef __CUDA CUDA_timer t("sum_q_pw_dm_pw_gpu"); #endif - hipStream_t stream = (hipStream_t)acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t)acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_gvec_loc__); - hipLaunchKernelGGL((sum_q_pw_dm_pw_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), stream, + accLaunchKernel((sum_q_pw_dm_pw_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), stream, nbf__, q_pw__, dm_pw__, diff --git a/src/Potential/xc_functional_base.hpp b/src/Potential/xc_functional_base.hpp index 27a958c89..8ab52a88d 100644 --- a/src/Potential/xc_functional_base.hpp +++ b/src/Potential/xc_functional_base.hpp @@ -51,12 +51,12 @@ const std::map libxc_functionals = { {"XC_LDA_C_1D_CSC", XC_LDA_C_1D_CSC}, /* Casula, Sorella, and Senatore 1D correlation */ {"XC_LDA_X_2D", XC_LDA_X_2D}, /* Exchange in 2D */ {"XC_LDA_XC_TETER93", XC_LDA_XC_TETER93}, /* Teter 93 parametrization */ -#if (XC_MAJOR_VERSION >= 4) && (XC_MINOR_VERSION >= 3) - {"XC_LDA_X_1D_SOFT", XC_LDA_X_1D_SOFT}, - {"XC_LDA_X_1D_EXPONENTIAL", XC_LDA_X_1D_EXPONENTIAL}, -#else - {"XC_LDA_X_1D", XC_LDA_X_1D}, /* Exchange in 1D */ -#endif +// #if (XC_MAJOR_VERSION >= 4) && (XC_MINOR_VERSION >= 3) +// {"XC_LDA_X_1D_SOFT", XC_LDA_X_1D_SOFT}, +// {"XC_LDA_X_1D_EXPONENTIAL", XC_LDA_X_1D_EXPONENTIAL}, +// #else +// {"XC_LDA_X_1D", XC_LDA_X_1D}, [> Exchange in 1D <] +// #endif {"XC_LDA_C_ML1", XC_LDA_C_ML1}, /* Modified LSD (version 1) of Proynov and Salahub */ {"XC_LDA_C_ML2", XC_LDA_C_ML2}, /* Modified LSD (version 2) of Proynov and Salahub */ {"XC_LDA_C_GOMBAS", XC_LDA_C_GOMBAS}, /* Gombas parametrization */ diff --git a/src/SDDK/GPU/acc.hpp b/src/SDDK/GPU/acc.hpp index 891bca52b..9a378737e 100644 --- a/src/SDDK/GPU/acc.hpp +++ b/src/SDDK/GPU/acc.hpp @@ -31,10 +31,12 @@ #include #include #include +#include #endif #if defined(__ROCM) #include +#include #endif #include @@ -47,9 +49,9 @@ #include #if defined(__CUDA) -#define P(x) cuda##x +#define GPU_PREFIX(x) cuda##x #elif defined(__ROCM) -#define P(x) hip##x +#define GPU_PREFIX(x) hip##x #endif #if defined(__CUDA) @@ -68,6 +70,32 @@ using acc_error_t = hipError_t; using acc_error_t = void; #endif +#if defined(__CUDA) +using acc_complex_float_t = cuFloatComplex; +using acc_complex_double_t = cuDoubleComplex; +#define make_accDoubleComplex make_cuDoubleComplex +#define make_accFloatComplex make_cuFloatComplex +#define accCadd cuCadd +#define accCsub cuCsub +#define accCmul cuCmul +#define accCdiv cuCdiv +#define accConj cuConj +#define ACC_DYNAMIC_SHARED(type, var) extern __shared__ type var[]; + +#elif defined(__ROCM) +using acc_error_t = hipError_t; +using acc_complex_float_t = hipFloatComplex; +using acc_complex_double_t = hipDoubleComplex; +#define make_accDoubleComplex make_hipDoubleComplex +#define make_accFloatComplex make_hipFloatComplex +#define accCadd hipCadd +#define accCsub hipCsub +#define accCmul hipCmul +#define accCdiv hipCdiv +#define accConj hipConj +#define ACC_DYNAMIC_SHARED(type, var) HIP_DYNAMIC_SHARED(type, var) +#endif + /// Helper class to wrap stream id (integer number). class stream_id { @@ -133,12 +161,12 @@ inline void stack_backtrace() #define CALL_DEVICE_API(func__, args__) \ { \ acc_error_t error; \ - error = P(func__) args__; \ - if (error != P(Success)) { \ + error = GPU_PREFIX(func__) args__; \ + if (error != GPU_PREFIX(Success)) { \ char nm[1024]; \ gethostname(nm, 1024); \ printf("hostname: %s\n", nm); \ - printf("Error in %s at line %i of file %s: %s\n", #func__, __LINE__, __FILE__, P(GetErrorString)(error)); \ + printf("Error in %s at line %i of file %s: %s\n", #func__, __LINE__, __FILE__, GPU_PREFIX(GetErrorString)(error)); \ stack_backtrace(); \ } \ } @@ -235,7 +263,7 @@ inline int num_devices() { int count{0}; #if defined(__CUDA) || defined(__ROCM) - if (P(GetDeviceCount)(&count) != P(Success)) { + if (GPU_PREFIX(GetDeviceCount)(&count) != GPU_PREFIX(Success)) { return 0; } #endif @@ -297,7 +325,7 @@ inline void copy(T* target__, T const* source__, size_t n__) { assert(source__ != nullptr); assert(target__ != nullptr); - CALL_DEVICE_API(Memcpy, (target__, source__, n__ * sizeof(T), P(MemcpyDeviceToDevice))); + CALL_DEVICE_API(Memcpy, (target__, source__, n__ * sizeof(T), GPU_PREFIX(MemcpyDeviceToDevice))); } /// 2D copy inside a device. @@ -305,21 +333,21 @@ template inline void copy(T* target__, int ld1__, T const* source__, int ld2__, int nrow__, int ncol__) { CALL_DEVICE_API(Memcpy2D, (target__, ld1__ * sizeof(T), source__, ld2__ * sizeof(T), nrow__ * sizeof(T), ncol__, - P(MemcpyDeviceToDevice))); + GPU_PREFIX(MemcpyDeviceToDevice))); } /// Copy memory from host to device. template inline void copyin(T* target__, T const* source__, size_t n__) { - CALL_DEVICE_API(Memcpy, (target__, source__, n__ * sizeof(T), P(MemcpyHostToDevice))); + CALL_DEVICE_API(Memcpy, (target__, source__, n__ * sizeof(T), GPU_PREFIX(MemcpyHostToDevice))); } /// Asynchronous copy from host to device. template inline void copyin(T* target__, T const* source__, size_t n__, stream_id sid__) { - CALL_DEVICE_API(MemcpyAsync, (target__, source__, n__ * sizeof(T), P(MemcpyHostToDevice), stream(sid__))); + CALL_DEVICE_API(MemcpyAsync, (target__, source__, n__ * sizeof(T), GPU_PREFIX(MemcpyHostToDevice), stream(sid__))); } /// 2D copy to the device. @@ -327,7 +355,7 @@ template inline void copyin(T* target__, int ld1__, T const* source__, int ld2__, int nrow__, int ncol__) { CALL_DEVICE_API(Memcpy2D, (target__, ld1__ * sizeof(T), source__, ld2__ * sizeof(T), nrow__ * sizeof(T), ncol__, - P(MemcpyHostToDevice))); + GPU_PREFIX(MemcpyHostToDevice))); } /// Asynchronous 2D copy to the device. @@ -335,21 +363,21 @@ template inline void copyin(T* target__, int ld1__, T const* source__, int ld2__, int nrow__, int ncol__, stream_id sid__) { CALL_DEVICE_API(Memcpy2DAsync, (target__, ld1__ * sizeof(T), source__, ld2__ * sizeof(T), nrow__ * sizeof(T), ncol__, - P(MemcpyHostToDevice), stream(sid__))); + GPU_PREFIX(MemcpyHostToDevice), stream(sid__))); } /// Copy memory from device to host. template inline void copyout(T* target__, T const* source__, size_t n__) { - CALL_DEVICE_API(Memcpy, (target__, source__, n__ * sizeof(T), P(MemcpyDeviceToHost))); + CALL_DEVICE_API(Memcpy, (target__, source__, n__ * sizeof(T), GPU_PREFIX(MemcpyDeviceToHost))); } /// Asynchronous copy from device to host. template inline void copyout(T* target__, T const* source__, size_t n__, stream_id sid__) { - CALL_DEVICE_API(MemcpyAsync, (target__, source__, n__ * sizeof(T), P(MemcpyDeviceToHost), stream(sid__))); + CALL_DEVICE_API(MemcpyAsync, (target__, source__, n__ * sizeof(T), GPU_PREFIX(MemcpyDeviceToHost), stream(sid__))); } /// 2D copy from device to host. @@ -357,7 +385,7 @@ template inline void copyout(T* target__, int ld1__, T const* source__, int ld2__, int nrow__, int ncol__) { CALL_DEVICE_API(Memcpy2D, (target__, ld1__ * sizeof(T), source__, ld2__ * sizeof(T), nrow__ * sizeof(T), ncol__, - P(MemcpyDeviceToHost))); + GPU_PREFIX(MemcpyDeviceToHost))); } /// Asynchronous 2D copy from device to host. @@ -365,7 +393,7 @@ template inline void copyout(T* target__, int ld1__, T const* source__, int ld2__, int nrow__, int ncol__, stream_id sid__) { CALL_DEVICE_API(Memcpy2D, (target__, ld1__ * sizeof(T), source__, ld2__ * sizeof(T), nrow__ * sizeof(T), ncol__, - P(MemcpyDeviceToHost), stream(sid__))); + GPU_PREFIX(MemcpyDeviceToHost), stream(sid__))); } /// Zero the device memory. @@ -375,6 +403,12 @@ inline void zero(T* ptr__, size_t n__) CALL_DEVICE_API(Memset, (ptr__, 0, n__ * sizeof(T))); } +template +inline void zero(T* ptr__, size_t n__, stream_id sid__) +{ + CALL_DEVICE_API(MemsetAsync, (ptr__, 0, n__ * sizeof(T), stream(sid__))); +} + /// Zero the 2D block of device memory. template inline void zero(T* ptr__, int ld__, int nrow__, int ncol__) @@ -472,11 +506,11 @@ inline bool check_device_ptr(void const* ptr__) } // namespace acc #if defined(__GPU) -extern "C" void scale_matrix_columns_gpu(int nrow, int ncol, void* mtrx, double* a); +extern "C" void scale_matrix_columns_gpu(int nrow, int ncol, acc_complex_double_t* mtrx, double* a); -extern "C" void scale_matrix_rows_gpu(int nrow, int ncol, void* mtrx, double const* v); +extern "C" void scale_matrix_rows_gpu(int nrow, int ncol, acc_complex_double_t* mtrx, double const* v); -extern "C" void scale_matrix_elements_gpu(std::complex* ptr__, +extern "C" void scale_matrix_elements_gpu(acc_complex_double_t* ptr__, int ld__, int nrow__, int ncol__, diff --git a/src/SDDK/GPU/acc_runtime.hpp b/src/SDDK/GPU/acc_runtime.hpp new file mode 100644 index 000000000..d89048672 --- /dev/null +++ b/src/SDDK/GPU/acc_runtime.hpp @@ -0,0 +1,75 @@ +// Copyright (c) 2013-2018 Anton Kozhevnikov, Thomas Schulthess +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are permitted provided that +// the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the +// following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +// and the following disclaimer in the documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** \file acc.hpp + * + * \brief Interface to accelerators API. + * + */ +#ifndef __ACC_RUNTIME_HPP__ +#define __ACC_RUNTIME_HPP__ + +#include "acc.hpp" + +#if defined(__CUDA) +#include +#endif + +#if defined(__ROCM) +#include +#endif + +/* + * CUDA runtime calls and definitions + */ +#ifdef __CUDA +#define accLaunchKernel(kernelName, numblocks, numthreads, memperblock, streamId, ...) \ + do { \ + kernelName<<>>(__VA_ARGS__); \ + } while (0) + +#define hipThreadIdx_x threadIdx.x +#define hipThreadIdx_y threadIdx.y +#define hipThreadIdx_z threadIdx.z + +#define hipBlockIdx_x blockIdx.x +#define hipBlockIdx_y blockIdx.y +#define hipBlockIdx_z blockIdx.z + +#define hipBlockDim_x blockDim.x +#define hipBlockDim_y blockDim.y +#define hipBlockDim_z blockDim.z + +#define hipGridDim_x gridDim.x +#define hipGridDim_y gridDim.y +#define hipGridDim_z gridDim.z +#endif + +/* + * ROCM runtime calls and definitions + */ +#ifdef __ROCM +#define accLaunchKernel(...) \ + do { \ + hipLaunchKernelGGL(__VA_ARGS__); \ + } while (0) + +#endif + +#endif diff --git a/src/SDDK/GPU/checksum.cu b/src/SDDK/GPU/checksum.cu index 76b34cc67..20fbafac2 100644 --- a/src/SDDK/GPU/checksum.cu +++ b/src/SDDK/GPU/checksum.cu @@ -23,19 +23,18 @@ */ #include "cuda_common.hpp" -#include -#include +#include "acc_runtime.hpp" __global__ void double_complex_checksum_gpu_kernel ( - hipDoubleComplex const* ptr__, + acc_complex_double_t const* ptr__, size_t size__, - hipDoubleComplex *result__ + acc_complex_double_t *result__ ) { int N = num_blocks(size__, blockDim.x); - HIP_DYNAMIC_SHARED( char, sdata_ptr) + ACC_DYNAMIC_SHARED( char, sdata_ptr) double* sdata_x = (double*)&sdata_ptr[0]; double* sdata_y = (double*)&sdata_ptr[blockDim.x * sizeof(double)]; @@ -59,26 +58,26 @@ __global__ void double_complex_checksum_gpu_kernel __syncthreads(); } - *result__ = make_hipDoubleComplex(sdata_x[0], sdata_y[0]); + *result__ = make_accDoubleComplex(sdata_x[0], sdata_y[0]); } -extern "C" void double_complex_checksum_gpu(hipDoubleComplex const* ptr__, +extern "C" void double_complex_checksum_gpu(acc_complex_double_t const* ptr__, size_t size__, - hipDoubleComplex* result__) + acc_complex_double_t* result__) { dim3 grid_t(64); dim3 grid_b(1); - hipDoubleComplex* res; - hipMalloc(&res, sizeof(hipDoubleComplex)); + acc_complex_double_t* res; + res = acc::allocate(1); - hipLaunchKernelGGL((double_complex_checksum_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), 0, + accLaunchKernel((double_complex_checksum_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), 0, ptr__, size__, res ); - hipMemcpy(result__, res, sizeof(hipDoubleComplex), hipMemcpyDeviceToHost); + acc::copyout(result__, res, 1); - hipFree(res); + acc::deallocate(res); } diff --git a/src/SDDK/GPU/cuda_common.hpp b/src/SDDK/GPU/cuda_common.hpp index 3dcf1f60b..cc6df0c5c 100644 --- a/src/SDDK/GPU/cuda_common.hpp +++ b/src/SDDK/GPU/cuda_common.hpp @@ -26,8 +26,7 @@ #define __CUDA_COMMON_HPP__ #include -#include "hip/hip_runtime.h" -#include "hip/hip_complex.h" +#include "acc.hpp" const double twopi = 6.2831853071795864769; diff --git a/src/SDDK/GPU/fft_kernels.cu b/src/SDDK/GPU/fft_kernels.cu index 3a32af584..d9d273c06 100644 --- a/src/SDDK/GPU/fft_kernels.cu +++ b/src/SDDK/GPU/fft_kernels.cu @@ -22,11 +22,9 @@ * \brief Contains implementaiton of CUDA and ROCM kernels necessary for a FFT driver. */ -#include "acc.hpp" #include -#include -#include #include "cuda_common.hpp" +#include "acc_runtime.hpp" //NOTE: HIP will call the corresponding CUDA function if compiled with CUDA support @@ -36,8 +34,8 @@ __global__ void repack_z_buffer_gpu_kernel(int size_z, int num_zcol_loc, int const* local_z_offsets, int const* local_z_sizes, - hipDoubleComplex* z_sticks_local, - hipDoubleComplex* a2a_buffer) + acc_complex_double_t* z_sticks_local, + acc_complex_double_t* a2a_buffer) { int iz = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; int izcol = hipBlockIdx_y; @@ -62,14 +60,14 @@ extern "C" void repack_z_buffer_gpu(int direction, int zcol_max_size, int const* local_z_offsets, int const* local_z_sizes, - hipDoubleComplex* z_sticks_local, - hipDoubleComplex* a2a_buffer) + acc_complex_double_t* z_sticks_local, + acc_complex_double_t* a2a_buffer) { dim3 grid_t(64); dim3 grid_b(num_blocks(zcol_max_size, grid_t.x), num_zcol_loc, num_ranks); if (direction == 1) { - hipLaunchKernelGGL((repack_z_buffer_gpu_kernel<1>), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((repack_z_buffer_gpu_kernel<1>), dim3(grid_b), dim3(grid_t), 0, 0, size_z, num_zcol_loc, local_z_offsets, @@ -78,7 +76,7 @@ extern "C" void repack_z_buffer_gpu(int direction, a2a_buffer ); } else { - hipLaunchKernelGGL((repack_z_buffer_gpu_kernel<-1>), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((repack_z_buffer_gpu_kernel<-1>), dim3(grid_b), dim3(grid_t), 0, 0, size_z, num_zcol_loc, local_z_offsets, @@ -94,8 +92,8 @@ extern "C" void repack_z_buffer_gpu(int direction, __global__ void batch_load_gpu_kernel(int fft_size, int num_pw_components, int const* map, - hipDoubleComplex const* data, - hipDoubleComplex* fft_buffer) + acc_complex_double_t const* data, + acc_complex_double_t* fft_buffer) { int i = hipBlockIdx_y; int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; @@ -109,18 +107,18 @@ extern "C" void batch_load_gpu(int fft_size, int num_pw_components, int num_fft, int const* map, - hipDoubleComplex const* data, - hipDoubleComplex* fft_buffer, + acc_complex_double_t const* data, + acc_complex_double_t* fft_buffer, int stream_id__) { dim3 grid_t(64); dim3 grid_b(num_blocks(num_pw_components, grid_t.x), num_fft); - hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t) acc::stream(stream_id(stream_id__)); - hipMemsetAsync(fft_buffer, 0, fft_size * num_fft * sizeof(hipDoubleComplex), stream); + acc::zero(fft_buffer, fft_size*num_fft, stream_id(stream_id__)); - hipLaunchKernelGGL((batch_load_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((batch_load_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, fft_size, num_pw_components, map, @@ -132,8 +130,8 @@ extern "C" void batch_load_gpu(int fft_size, __global__ void batch_unload_gpu_kernel(int fft_size, int num_pw_components, int const* map, - hipDoubleComplex const* fft_buffer, - hipDoubleComplex* data, + acc_complex_double_t const* fft_buffer, + acc_complex_double_t* data, double alpha, double beta) { @@ -141,9 +139,9 @@ __global__ void batch_unload_gpu_kernel(int fft_size, int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; if (idx < num_pw_components) { - hipDoubleComplex z1 = data[array2D_offset(idx, i, num_pw_components)]; - hipDoubleComplex z2 = fft_buffer[array2D_offset(map[idx], i, fft_size)]; - data[array2D_offset(idx, i, num_pw_components)] = make_hipDoubleComplex(alpha * z1.x + beta * z2.x, alpha * z1.y + beta * z2.y); + acc_complex_double_t z1 = data[array2D_offset(idx, i, num_pw_components)]; + acc_complex_double_t z2 = fft_buffer[array2D_offset(map[idx], i, fft_size)]; + data[array2D_offset(idx, i, num_pw_components)] = make_accDoubleComplex(alpha * z1.x + beta * z2.x, alpha * z1.y + beta * z2.y); //data[array2D_offset(idx, i, num_pw_components)] = cuCadd( // cuCmul(make_cuDoubleComplex(alpha, 0), data[array2D_offset(idx, i, num_pw_components)]), @@ -158,8 +156,8 @@ extern "C" void batch_unload_gpu(int fft_size, int num_pw_components, int num_fft, int const* map, - hipDoubleComplex const* fft_buffer, - hipDoubleComplex* data, + acc_complex_double_t const* fft_buffer, + acc_complex_double_t* data, double alpha, double beta, int stream_id__) @@ -167,13 +165,13 @@ extern "C" void batch_unload_gpu(int fft_size, dim3 grid_t(64); dim3 grid_b(num_blocks(num_pw_components, grid_t.x), num_fft); - hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t) acc::stream(stream_id(stream_id__)); if (alpha == 0) { - hipMemsetAsync(data, 0, num_pw_components * sizeof(hipDoubleComplex), stream); + acc::zero(data, num_pw_components, stream_id(stream_id__)); } - hipLaunchKernelGGL((batch_unload_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((batch_unload_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, fft_size, num_pw_components, map, @@ -186,29 +184,29 @@ extern "C" void batch_unload_gpu(int fft_size, __global__ void load_x0y0_col_gpu_kernel(int z_col_size, int const* map, - hipDoubleComplex const* data, - hipDoubleComplex* fft_buffer) + acc_complex_double_t const* data, + acc_complex_double_t* fft_buffer) { int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; if (idx < z_col_size) { - fft_buffer[map[idx]] = make_hipDoubleComplex(data[idx].x, -data[idx].y); + fft_buffer[map[idx]] = make_accDoubleComplex(data[idx].x, -data[idx].y); } } extern "C" void load_x0y0_col_gpu(int z_col_size, int const* map, - hipDoubleComplex const* data, - hipDoubleComplex* fft_buffer, + acc_complex_double_t const* data, + acc_complex_double_t* fft_buffer, int stream_id__) { dim3 grid_t(64); dim3 grid_b(num_blocks(z_col_size, grid_t.x)); - hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t) acc::stream(stream_id(stream_id__)); - hipLaunchKernelGGL((load_x0y0_col_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((load_x0y0_col_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream, z_col_size, map, data, @@ -217,8 +215,8 @@ extern "C" void load_x0y0_col_gpu(int z_col_size, } template -__global__ void pack_unpack_z_cols_gpu_kernel(hipDoubleComplex* z_cols_packed__, - hipDoubleComplex* fft_buf__, +__global__ void pack_unpack_z_cols_gpu_kernel(acc_complex_double_t* z_cols_packed__, + acc_complex_double_t* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -246,8 +244,8 @@ __global__ void pack_unpack_z_cols_gpu_kernel(hipDoubleComplex* z_cols_packed__, } } -extern "C" void unpack_z_cols_gpu(hipDoubleComplex* z_cols_packed__, - hipDoubleComplex* fft_buf__, +extern "C" void unpack_z_cols_gpu(acc_complex_double_t* z_cols_packed__, + acc_complex_double_t* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -256,14 +254,14 @@ extern "C" void unpack_z_cols_gpu(hipDoubleComplex* z_cols_packed__, bool use_reduction__, int stream_id__) { - hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t) acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - hipMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(hipDoubleComplex), stream); + acc::zero(fft_buf__, size_x__ * size_y__ * size_z__, stream_id(stream_id__)); - hipLaunchKernelGGL((pack_unpack_z_cols_gpu_kernel<1, false>), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((pack_unpack_z_cols_gpu_kernel<1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed__, fft_buf__, size_x__, @@ -273,7 +271,7 @@ extern "C" void unpack_z_cols_gpu(hipDoubleComplex* z_cols_packed__, z_col_pos__ ); if (use_reduction__) { - hipLaunchKernelGGL((pack_unpack_z_cols_gpu_kernel<1, true>), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((pack_unpack_z_cols_gpu_kernel<1, true>), dim3(grid_b), dim3(grid_t), 0, stream, &z_cols_packed__[size_z__], // skip first column for {-x, -y} coordinates fft_buf__, size_x__, @@ -285,8 +283,8 @@ extern "C" void unpack_z_cols_gpu(hipDoubleComplex* z_cols_packed__, } } -extern "C" void pack_z_cols_gpu(hipDoubleComplex* z_cols_packed__, - hipDoubleComplex* fft_buf__, +extern "C" void pack_z_cols_gpu(acc_complex_double_t* z_cols_packed__, + acc_complex_double_t* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -294,12 +292,12 @@ extern "C" void pack_z_cols_gpu(hipDoubleComplex* z_cols_packed__, int const* z_col_pos__, int stream_id__) { - hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t) acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - hipLaunchKernelGGL((pack_unpack_z_cols_gpu_kernel<-1, false>), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((pack_unpack_z_cols_gpu_kernel<-1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed__, fft_buf__, size_x__, @@ -311,9 +309,9 @@ extern "C" void pack_z_cols_gpu(hipDoubleComplex* z_cols_packed__, } template -__global__ void pack_unpack_two_z_cols_gpu_kernel(hipDoubleComplex* z_cols_packed1__, - hipDoubleComplex* z_cols_packed2__, - hipDoubleComplex* fft_buf__, +__global__ void pack_unpack_two_z_cols_gpu_kernel(acc_complex_double_t* z_cols_packed1__, + acc_complex_double_t* z_cols_packed2__, + acc_complex_double_t* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -327,32 +325,32 @@ __global__ void pack_unpack_two_z_cols_gpu_kernel(hipDoubleComplex* z_cols_packe /* load into buffer */ if (direction == 1) { int ipos = z_col_pos__[icol]; - hipDoubleComplex z1 = z_cols_packed1__[array2D_offset(iz, icol, size_z__)]; - hipDoubleComplex z2 = z_cols_packed2__[array2D_offset(iz, icol, size_z__)]; + acc_complex_double_t z1 = z_cols_packed1__[array2D_offset(iz, icol, size_z__)]; + acc_complex_double_t z2 = z_cols_packed2__[array2D_offset(iz, icol, size_z__)]; if (conjugate) { /* conj(z1) + I * conj(z2) */ - fft_buf__[array2D_offset(ipos, iz, size_xy)] = make_hipDoubleComplex(z1.x + z2.y, z2.x - z1.y); + fft_buf__[array2D_offset(ipos, iz, size_xy)] = make_accDoubleComplex(z1.x + z2.y, z2.x - z1.y); } else { /* z1 + I * z2 */ - fft_buf__[array2D_offset(ipos, iz, size_xy)] = make_hipDoubleComplex(z1.x - z2.y, z1.y + z2.x); + fft_buf__[array2D_offset(ipos, iz, size_xy)] = make_accDoubleComplex(z1.x - z2.y, z1.y + z2.x); } } if (direction == -1) { int ipos1 = z_col_pos__[icol]; int ipos2 = z_col_pos__[num_z_cols__ + icol]; - hipDoubleComplex z1 = fft_buf__[array2D_offset(ipos1, iz, size_xy)]; - hipDoubleComplex z2 = fft_buf__[array2D_offset(ipos2, iz, size_xy)]; + acc_complex_double_t z1 = fft_buf__[array2D_offset(ipos1, iz, size_xy)]; + acc_complex_double_t z2 = fft_buf__[array2D_offset(ipos2, iz, size_xy)]; - z_cols_packed1__[array2D_offset(iz, icol, size_z__)] = make_hipDoubleComplex(0.5 * (z1.x + z2.x), 0.5 * (z1.y - z2.y)); - z_cols_packed2__[array2D_offset(iz, icol, size_z__)] = make_hipDoubleComplex(0.5 * (z1.y + z2.y), 0.5 * (z2.x - z1.x)); + z_cols_packed1__[array2D_offset(iz, icol, size_z__)] = make_accDoubleComplex(0.5 * (z1.x + z2.x), 0.5 * (z1.y - z2.y)); + z_cols_packed2__[array2D_offset(iz, icol, size_z__)] = make_accDoubleComplex(0.5 * (z1.y + z2.y), 0.5 * (z2.x - z1.x)); } } } -extern "C" void unpack_z_cols_2_gpu(hipDoubleComplex* z_cols_packed1__, - hipDoubleComplex* z_cols_packed2__, - hipDoubleComplex* fft_buf__, +extern "C" void unpack_z_cols_2_gpu(acc_complex_double_t* z_cols_packed1__, + acc_complex_double_t* z_cols_packed2__, + acc_complex_double_t* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -360,14 +358,14 @@ extern "C" void unpack_z_cols_2_gpu(hipDoubleComplex* z_cols_packed1__, int const* z_col_pos__, int stream_id__) { - hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t) acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - hipMemsetAsync(fft_buf__, 0, size_x__ * size_y__ * size_z__ * sizeof(hipDoubleComplex), stream); + acc::zero(fft_buf__, size_x__ * size_y__ * size_z__, stream_id(stream_id__)); - hipLaunchKernelGGL((pack_unpack_two_z_cols_gpu_kernel<1, false>), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((pack_unpack_two_z_cols_gpu_kernel<1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed1__, z_cols_packed2__, fft_buf__, @@ -377,7 +375,7 @@ extern "C" void unpack_z_cols_2_gpu(hipDoubleComplex* z_cols_packed1__, num_z_cols__, z_col_pos__ ); - hipLaunchKernelGGL((pack_unpack_two_z_cols_gpu_kernel<1, true>), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((pack_unpack_two_z_cols_gpu_kernel<1, true>), dim3(grid_b), dim3(grid_t), 0, stream, &z_cols_packed1__[size_z__], // skip first column for {-x, -y} coordinates &z_cols_packed2__[size_z__], // skip first column for {-x, -y} coordinates fft_buf__, @@ -389,9 +387,9 @@ extern "C" void unpack_z_cols_2_gpu(hipDoubleComplex* z_cols_packed1__, ); } -extern "C" void pack_z_cols_2_gpu(hipDoubleComplex* z_cols_packed1__, - hipDoubleComplex* z_cols_packed2__, - hipDoubleComplex* fft_buf__, +extern "C" void pack_z_cols_2_gpu(acc_complex_double_t* z_cols_packed1__, + acc_complex_double_t* z_cols_packed2__, + acc_complex_double_t* fft_buf__, int size_x__, int size_y__, int size_z__, @@ -399,12 +397,12 @@ extern "C" void pack_z_cols_2_gpu(hipDoubleComplex* z_cols_packed1__, int const* z_col_pos__, int stream_id__) { - hipStream_t stream = (hipStream_t) acc::stream(stream_id(stream_id__)); + acc_stream_t stream = (acc_stream_t) acc::stream(stream_id(stream_id__)); dim3 grid_t(64); dim3 grid_b(num_blocks(num_z_cols__, grid_t.x), size_z__); - hipLaunchKernelGGL((pack_unpack_two_z_cols_gpu_kernel<-1, false>), dim3(grid_b), dim3(grid_t), 0, stream, + accLaunchKernel((pack_unpack_two_z_cols_gpu_kernel<-1, false>), dim3(grid_b), dim3(grid_t), 0, stream, z_cols_packed1__, z_cols_packed2__, fft_buf__, diff --git a/src/SDDK/GPU/gpublas_interface.hpp b/src/SDDK/GPU/gpublas_interface.hpp index fb2dbaf3d..a0f9579bf 100644 --- a/src/SDDK/GPU/gpublas_interface.hpp +++ b/src/SDDK/GPU/gpublas_interface.hpp @@ -25,7 +25,6 @@ #ifndef __GPUBLAS_INTERFACE_HPP__ #define __GPUBLAS_INTERFACE_HPP__ -#include #if defined(__GPU) && defined(__CUDA) #include "cublas.hpp" diff --git a/src/SDDK/GPU/scale_matrix.cu b/src/SDDK/GPU/scale_matrix.cu index 043c747f3..9324151c9 100644 --- a/src/SDDK/GPU/scale_matrix.cu +++ b/src/SDDK/GPU/scale_matrix.cu @@ -22,13 +22,12 @@ * \brief Contains implementaiton of CUDA kernels to scale matrix elements (rows or columns). */ #include "cuda_common.hpp" -#include -#include +#include "acc_runtime.hpp" __global__ void scale_matrix_columns_gpu_kernel ( int nrow, - hipDoubleComplex* mtrx, + acc_complex_double_t* mtrx, double* a ) { @@ -37,20 +36,20 @@ __global__ void scale_matrix_columns_gpu_kernel if (irow < nrow) { mtrx[array2D_offset(irow, icol, nrow)] = - hipCmul(mtrx[array2D_offset(irow, icol, nrow)], make_hipDoubleComplex(a[icol], 0)); + accCmul(mtrx[array2D_offset(irow, icol, nrow)], make_accDoubleComplex(a[icol], 0)); } } // scale each column of the matrix by a column-dependent constant extern "C" void scale_matrix_columns_gpu(int nrow, int ncol, - hipDoubleComplex* mtrx, + acc_complex_double_t* mtrx, double* a) { dim3 grid_t(64); dim3 grid_b(num_blocks(nrow, grid_t.x), ncol); - hipLaunchKernelGGL((scale_matrix_columns_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((scale_matrix_columns_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, nrow, mtrx, a @@ -60,28 +59,28 @@ extern "C" void scale_matrix_columns_gpu(int nrow, __global__ void scale_matrix_rows_gpu_kernel ( int nrow__, - hipDoubleComplex* mtrx__, + acc_complex_double_t* mtrx__, double const* v__ ) { int icol = blockIdx.y; int irow = blockDim.x * blockIdx.x + threadIdx.x; if (irow < nrow__) { - hipDoubleComplex z = mtrx__[array2D_offset(irow, icol, nrow__)]; - mtrx__[array2D_offset(irow, icol, nrow__)] = make_hipDoubleComplex(z.x * v__[irow], z.y * v__[irow]); + acc_complex_double_t z = mtrx__[array2D_offset(irow, icol, nrow__)]; + mtrx__[array2D_offset(irow, icol, nrow__)] = make_accDoubleComplex(z.x * v__[irow], z.y * v__[irow]); } } // scale each row of the matrix by a row-dependent constant extern "C" void scale_matrix_rows_gpu(int nrow__, int ncol__, - hipDoubleComplex* mtrx__, + acc_complex_double_t* mtrx__, double const* v__) { dim3 grid_t(256); dim3 grid_b(num_blocks(nrow__, grid_t.x), ncol__); - hipLaunchKernelGGL((scale_matrix_rows_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((scale_matrix_rows_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, nrow__, mtrx__, v__ @@ -90,7 +89,7 @@ extern "C" void scale_matrix_rows_gpu(int nrow__, __global__ void scale_matrix_elements_gpu_kernel ( - hipDoubleComplex* mtrx__, + acc_complex_double_t* mtrx__, int ld__, int nrow__, double beta__ @@ -99,12 +98,12 @@ __global__ void scale_matrix_elements_gpu_kernel int icol = blockIdx.y; int irow = blockDim.x * blockIdx.x + threadIdx.x; if (irow < nrow__) { - hipDoubleComplex z = mtrx__[array2D_offset(irow, icol, ld__)]; - mtrx__[array2D_offset(irow, icol, ld__)] = make_hipDoubleComplex(z.x * beta__, z.y * beta__); + acc_complex_double_t z = mtrx__[array2D_offset(irow, icol, ld__)]; + mtrx__[array2D_offset(irow, icol, ld__)] = make_accDoubleComplex(z.x * beta__, z.y * beta__); } } -extern "C" void scale_matrix_elements_gpu(hipDoubleComplex* ptr__, +extern "C" void scale_matrix_elements_gpu(acc_complex_double_t* ptr__, int ld__, int nrow__, int ncol__, @@ -113,7 +112,7 @@ extern "C" void scale_matrix_elements_gpu(hipDoubleComplex* ptr__, dim3 grid_t(64); dim3 grid_b(num_blocks(nrow__, grid_t.x), ncol__); - hipLaunchKernelGGL((scale_matrix_elements_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, + accLaunchKernel((scale_matrix_elements_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0, ptr__, ld__, nrow__, diff --git a/src/SDDK/fft3d.hpp b/src/SDDK/fft3d.hpp index bba8d09c1..7b1c7c835 100644 --- a/src/SDDK/fft3d.hpp +++ b/src/SDDK/fft3d.hpp @@ -742,8 +742,6 @@ class FFT3D : public FFT3D_grid /* create plan for xy transform */ acc_fft_plan_xy_ = gpufft::create_batch_plan(2, dim_xy, dim_xy, 1, size(0) * size(1), local_size_z(), auto_alloc); - /* in CUDA case this is an alias */ - acc_fft_plan_xy_ = acc_fft_plan_xy_; /* stream #0 will execute FFTs */ gpufft::set_stream(acc_fft_plan_xy_, stream_id(acc_fft_stream_id_)); /* allocate arrays with z- offsets and sizes on the host and device*/ diff --git a/src/SDDK/linalg.hpp b/src/SDDK/linalg.hpp index 4d0245827..bf4f019a5 100644 --- a/src/SDDK/linalg.hpp +++ b/src/SDDK/linalg.hpp @@ -131,10 +131,10 @@ inline void linalg2::gemm(char transa, char transb, ftn_int } case linalg_t::gpublas: { #ifdef __GPU - gpublas::zgemm(transa, transb, m, n, k, reinterpret_cast(alpha), - reinterpret_cast(A), lda, reinterpret_cast(B), - ldb, reinterpret_cast(beta), - reinterpret_cast(C), ldc, sid()); + gpublas::zgemm(transa, transb, m, n, k, reinterpret_cast(alpha), + reinterpret_cast(A), lda, reinterpret_cast(B), + ldb, reinterpret_cast(beta), + reinterpret_cast(C), ldc, sid()); #else throw std::runtime_error("not compiled with GPU blas support!"); #endif @@ -143,11 +143,11 @@ inline void linalg2::gemm(char transa, char transb, ftn_int } case linalg_t::cublasxt: { #if defined(__GPU) && defined(__CUDA) - gpublas::xt::zgemm(transa, transb, m, n, k, reinterpret_cast(alpha), - reinterpret_cast(A), lda, - reinterpret_cast(B), ldb, - reinterpret_cast(beta), - reinterpret_cast(C), ldc); + gpublas::xt::zgemm(transa, transb, m, n, k, reinterpret_cast(alpha), + reinterpret_cast(A), lda, + reinterpret_cast(B), ldb, + reinterpret_cast(beta), + reinterpret_cast(C), ldc); #else throw std::runtime_error("not compiled with cublasxt"); #endif @@ -236,8 +236,8 @@ inline void linalg2::trmm(char side, char uplo, char transa, } case linalg_t::gpublas: { #ifdef __GPU - gpublas::ztrmm(side, uplo, transa, 'N', m, n, reinterpret_cast(alpha), - reinterpret_cast(A), lda, reinterpret_cast(B), ldb); + gpublas::ztrmm(side, uplo, transa, 'N', m, n, reinterpret_cast(alpha), + reinterpret_cast(A), lda, reinterpret_cast(B), ldb); #else throw std::runtime_error("not compiled with GPU blas support!"); #endif @@ -245,8 +245,8 @@ inline void linalg2::trmm(char side, char uplo, char transa, } case linalg_t::cublasxt: { #if defined(__GPU) && defined(__CUDA) - gpublas::xt::ztrmm(side, uplo, transa, 'N', m, n, reinterpret_cast(alpha), - reinterpret_cast(A), lda, reinterpret_cast(B), ldb); + gpublas::xt::ztrmm(side, uplo, transa, 'N', m, n, reinterpret_cast(alpha), + reinterpret_cast(A), lda, reinterpret_cast(B), ldb); #else throw std::runtime_error("not compiled with cublasxt"); #endif @@ -1184,7 +1184,7 @@ inline void linalg::gemv(int trans__, ftn_int m, ftn_in int stream_id) { const char trans[] = {'N', 'T', 'C'}; - gpublas::zgemv(trans[trans__], m, n, (hipDoubleComplex*)alpha, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stream_id); + gpublas::zgemv(trans[trans__], m, n, (acc_complex_double_t*)alpha, (acc_complex_double_t*)A, lda, (acc_complex_double_t*)x, incx, (acc_complex_double_t*)beta, (acc_complex_double_t*)y, incy, stream_id); } // Generic interface to zgemm @@ -1201,7 +1201,7 @@ inline void linalg::gemm(int transa__, int transb__, ft assert(n > 0); assert(k > 0); const char trans[] = {'N', 'T', 'C'}; - gpublas::zgemm(trans[transa__], trans[transb__], m, n, k, (hipDoubleComplex*)alpha, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, stream_id); + gpublas::zgemm(trans[transa__], trans[transb__], m, n, k, (acc_complex_double_t*)alpha, (acc_complex_double_t*)A, lda, (acc_complex_double_t*)B, ldb, (acc_complex_double_t*)beta, (acc_complex_double_t*)C, ldc, stream_id); } // Generic interface to dgemm @@ -1276,7 +1276,8 @@ inline void linalg::trmm(char side, ftn_double_complex* B, ftn_int ldb) { - gpublas::ztrmm(side, uplo, transa, 'N', m, n, (hipDoubleComplex*)alpha, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb); + gpublas::ztrmm(side, uplo, transa, 'N', m, n, (acc_complex_double_t*)alpha, (acc_complex_double_t*)A, lda, + (acc_complex_double_t*)B, ldb); } template <> @@ -1287,7 +1288,8 @@ inline void linalg::axpy(ftn_int n__, ftn_double_complex* y__, ftn_int incy__) { - gpublas::zaxpy(n__, (hipDoubleComplex const*)alpha__, (hipDoubleComplex*)x__, incx__, (hipDoubleComplex*)y__, incy__); + gpublas::zaxpy(n__, (acc_complex_double_t const*)alpha__, (acc_complex_double_t*)x__, incx__, + (acc_complex_double_t*)y__, incy__); } #endif // __GPU diff --git a/src/SDDK/matrix_storage.hpp b/src/SDDK/matrix_storage.hpp index 9363fe050..2077efba8 100644 --- a/src/SDDK/matrix_storage.hpp +++ b/src/SDDK/matrix_storage.hpp @@ -447,7 +447,8 @@ class matrix_storage } } else { #if defined(__GPU) - scale_matrix_elements_gpu(prime().at(mem__, 0, i0__), prime().ld(), num_rows_loc(), n__, beta__); + scale_matrix_elements_gpu((acc_complex_double_t*)prime().at(mem__, 0, i0__), prime().ld(), num_rows_loc(), + n__, beta__); #endif } } diff --git a/src/utils/amd_hip/hip/channel_descriptor.h b/src/utils/amd_hip/hip/channel_descriptor.h deleted file mode 100644 index 842701bad..000000000 --- a/src/utils/amd_hip/hip/channel_descriptor.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H -#define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H - -// Some standard header files, these are included by hc.hpp and so want to make them avail on both -// paths to provide a consistent include env and avoid "missing symbol" errors that only appears -// on NVCC path: - - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - -#endif diff --git a/src/utils/amd_hip/hip/device_functions.h b/src/utils/amd_hip/hip/device_functions.h deleted file mode 100644 index f6059f202..000000000 --- a/src/utils/amd_hip/hip/device_functions.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H -#define HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H - -#include - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - -#endif diff --git a/src/utils/amd_hip/hip/driver_types.h b/src/utils/amd_hip/hip/driver_types.h deleted file mode 100644 index d428ec7f2..000000000 --- a/src/utils/amd_hip/hip/driver_types.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H -#define HIP_INCLUDE_HIP_DRIVER_TYPES_H - -#include - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include "driver_types.h" -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/channel_descriptor.h b/src/utils/amd_hip/hip/hcc_detail/channel_descriptor.h deleted file mode 100644 index de290fafc..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/channel_descriptor.h +++ /dev/null @@ -1,346 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H - -#include -#include - -#ifdef __cplusplus - -hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f); - -static inline hipChannelFormatDesc hipCreateChannelDescHalf() { - int e = (int)sizeof(unsigned short) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); -} - -static inline hipChannelFormatDesc hipCreateChannelDescHalf1() { - int e = (int)sizeof(unsigned short) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); -} - -static inline hipChannelFormatDesc hipCreateChannelDescHalf2() { - int e = (int)sizeof(unsigned short) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); -} - -template -static inline hipChannelFormatDesc hipCreateChannelDesc() { - return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(char) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed char) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned char) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned char) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed char) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned char) * 8; - return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed char) * 8; - return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); -} - -#ifndef __GNUC__ // vector3 is the same as vector4 -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned char) * 8; - return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed char) * 8; - return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); -} -#endif - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned char) * 8; - return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed char) * 8; - return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned short) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed short) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned short) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed short) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned short) * 8; - return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed short) * 8; - return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); -} - -#ifndef __GNUC__ -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned short) * 8; - return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed short) * 8; - return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); -} -#endif - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned short) * 8; - return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed short) * 8; - return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned int) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed int) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned int) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed int) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned int) * 8; - return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed int) * 8; - return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); -} - -#ifndef __GNUC__ -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned int) * 8; - return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed int) * 8; - return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); -} -#endif - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned int) * 8; - return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed int) * 8; - return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(float) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(float) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(float) * 8; - return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat); -} - -#ifndef __GNUC__ -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(float) * 8; - return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindFloat); -} -#endif - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(float) * 8; - return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned long) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed long) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned long) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed long) * 8; - return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned long) * 8; - return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed long) * 8; - return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); -} - -#ifndef __GNUC__ -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned long) * 8; - return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed long) * 8; - return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); -} -#endif - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(unsigned long) * 8; - return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); -} - -template <> -inline hipChannelFormatDesc hipCreateChannelDesc() { - int e = (int)sizeof(signed long) * 8; - return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); -} - -#else - -struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, - enum hipChannelFormatKind f); - -#endif - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/code_object_bundle.hpp b/src/utils/amd_hip/hip/hcc_detail/code_object_bundle.hpp deleted file mode 100644 index 7b97503c1..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/code_object_bundle.hpp +++ /dev/null @@ -1,139 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace hip_impl { -hsa_isa_t triple_to_hsa_isa(const std::string& triple); - -struct Bundled_code { - union Header { - struct { - std::uint64_t offset; - std::uint64_t bundle_sz; - std::uint64_t triple_sz; - }; - char cbuf[sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)]; - } header; - std::string triple; - std::vector blob; -}; - -class Bundled_code_header { - // DATA - STATICS - static constexpr const char magic_string_[] = "__CLANG_OFFLOAD_BUNDLE__"; - static constexpr auto magic_string_sz_ = sizeof(magic_string_) - 1; - - // DATA - union Header_ { - struct { - char bundler_magic_string_[magic_string_sz_]; - std::uint64_t bundle_cnt_; - }; - char cbuf_[sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)]; - } header_; - std::vector bundles_; - - // FRIENDS - MANIPULATORS - template - friend inline bool read(RandomAccessIterator f, RandomAccessIterator l, - Bundled_code_header& x) { - if (f == l) return false; - - std::copy_n(f, sizeof(x.header_.cbuf_), x.header_.cbuf_); - - if (valid(x)) { - x.bundles_.resize(x.header_.bundle_cnt_); - - auto it = f + sizeof(x.header_.cbuf_); - for (auto&& y : x.bundles_) { - std::copy_n(it, sizeof(y.header.cbuf), y.header.cbuf); - it += sizeof(y.header.cbuf); - - y.triple.assign(it, it + y.header.triple_sz); - - std::copy_n(f + y.header.offset, y.header.bundle_sz, std::back_inserter(y.blob)); - - it += y.header.triple_sz; - - x.bundled_code_size = std::max(x.bundled_code_size, - y.header.offset + y.header.bundle_sz); - } - - return true; - } - - return false; - } - friend inline bool read(const std::vector& blob, Bundled_code_header& x) { - return read(blob.cbegin(), blob.cend(), x); - } - friend inline bool read(std::istream& is, Bundled_code_header& x) { - return read( - std::vector{std::istreambuf_iterator{is}, std::istreambuf_iterator{}}, - x); - } - - // FRIENDS - ACCESSORS - friend inline bool valid(const Bundled_code_header& x) { - return std::equal(magic_string_, magic_string_ + magic_string_sz_, - x.header_.bundler_magic_string_); - } - friend inline const std::vector& bundles(const Bundled_code_header& x) { - return x.bundles_; - } - - public: - // CREATORS - Bundled_code_header() = default; - template - Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l); - explicit Bundled_code_header(const std::vector& blob); - explicit Bundled_code_header(const void* maybe_blob); - Bundled_code_header(const Bundled_code_header&) = default; - Bundled_code_header(Bundled_code_header&&) = default; - ~Bundled_code_header() = default; - - // MANIPULATORS - Bundled_code_header& operator=(const Bundled_code_header&) = default; - Bundled_code_header& operator=(Bundled_code_header&&) = default; - - size_t bundled_code_size = 0; -}; - -// CREATORS -template -Bundled_code_header::Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l) - : Bundled_code_header{} { - read(f, l, *this); -} -} // Namespace hip_impl. diff --git a/src/utils/amd_hip/hip/hcc_detail/concepts.hpp b/src/utils/amd_hip/hip/hcc_detail/concepts.hpp deleted file mode 100644 index 373cefb29..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/concepts.hpp +++ /dev/null @@ -1,30 +0,0 @@ -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -namespace hip_impl // Documentation only. -{ -#define requires(...) - -#define FunctionalProcedure typename -} // namespace hip_impl diff --git a/src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h b/src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h deleted file mode 100644 index 8b1378917..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/cuda/cuda.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/utils/amd_hip/hip/hcc_detail/cuda/math_functions.h b/src/utils/amd_hip/hip/hcc_detail/cuda/math_functions.h deleted file mode 100644 index 8b1378917..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/cuda/math_functions.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/utils/amd_hip/hip/hcc_detail/device_functions.h b/src/utils/amd_hip/hip/hcc_detail/device_functions.h deleted file mode 100644 index 602c6be87..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/device_functions.h +++ /dev/null @@ -1,1078 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H - -#include "host_defines.h" -#include "math_fwd.h" - -#include -#include - - -#include -#include -#include -/* -Integer Intrinsics -*/ - -// integer intrinsic function __poc __clz __ffs __brev -__device__ static inline unsigned int __popc(unsigned int input) { - return __builtin_popcount(input); -} -__device__ static inline unsigned int __popcll(unsigned long long int input) { - return __builtin_popcountl(input); -} - -__device__ static inline int __clz(int input) { - return __ockl_clz_u32((uint)input); -} - -__device__ static inline int __clzll(long long int input) { - return __ockl_clz_u64((ulong)input); -} - -__device__ static inline unsigned int __ffs(unsigned int input) { - return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; -} - -__device__ static inline unsigned int __ffsll(unsigned long long int input) { - return ( input == 0 ? -1 : __builtin_ctzl(input) ) + 1; -} - -__device__ static inline unsigned int __ffs(int input) { - return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; -} - -__device__ static inline unsigned int __ffsll(long long int input) { - return ( input == 0 ? -1 : __builtin_ctzl(input) ) + 1; -} - -__device__ static inline unsigned int __brev(unsigned int input) { - return __llvm_bitrev_b32(input); -} - -__device__ static inline unsigned long long int __brevll(unsigned long long int input) { - return __llvm_bitrev_b64(input); -} - -__device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) { - return input == 0 ? -1 : __builtin_ctzl(input); -} - -__device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, unsigned int src2) { - uint32_t offset = src1 & 31; - uint32_t width = src2 & 31; - return width == 0 ? 0 : (src0 << (32 - offset - width)) >> (32 - width); -} - -__device__ static inline uint64_t __bitextract_u64(uint64_t src0, unsigned int src1, unsigned int src2) { - uint64_t offset = src1 & 63; - uint64_t width = src2 & 63; - return width == 0 ? 0 : (src0 << (64 - offset - width)) >> (64 - width); -} - -__device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) { - uint32_t offset = src2 & 31; - uint32_t width = src3 & 31; - uint32_t mask = (1 << width) - 1; - return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); -} - -__device__ static inline uint64_t __bitinsert_u64(uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) { - uint64_t offset = src2 & 63; - uint64_t width = src3 & 63; - uint64_t mask = (1 << width) - 1; - return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); -} - -__device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s); -__device__ static unsigned int __hadd(int x, int y); -__device__ static int __mul24(int x, int y); -__device__ static long long int __mul64hi(long long int x, long long int y); -__device__ static int __mulhi(int x, int y); -__device__ static int __rhadd(int x, int y); -__device__ static unsigned int __sad(int x, int y, int z); -__device__ static unsigned int __uhadd(unsigned int x, unsigned int y); -__device__ static int __umul24(unsigned int x, unsigned int y); -__device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); -__device__ static unsigned int __umulhi(unsigned int x, unsigned int y); -__device__ static unsigned int __urhadd(unsigned int x, unsigned int y); -__device__ static unsigned int __usad(unsigned int x, unsigned int y, unsigned int z); - -struct ucharHolder { - union { - unsigned char c[4]; - unsigned int ui; - }; -} __attribute__((aligned(4))); - -struct uchar2Holder { - union { - unsigned int ui[2]; - unsigned char c[8]; - }; -} __attribute__((aligned(8))); - -__device__ -static inline unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) { - struct uchar2Holder cHoldVal; - struct ucharHolder cHoldKey; - struct ucharHolder cHoldOut; - cHoldKey.ui = s; - cHoldVal.ui[0] = x; - cHoldVal.ui[1] = y; - cHoldOut.c[0] = cHoldVal.c[cHoldKey.c[0]]; - cHoldOut.c[1] = cHoldVal.c[cHoldKey.c[1]]; - cHoldOut.c[2] = cHoldVal.c[cHoldKey.c[2]]; - cHoldOut.c[3] = cHoldVal.c[cHoldKey.c[3]]; - return cHoldOut.ui; -} - -__device__ static inline unsigned int __hadd(int x, int y) { - int z = x + y; - int sign = z & 0x8000000; - int value = z & 0x7FFFFFFF; - return ((value) >> 1 || sign); -} - -__device__ static inline int __mul24(int x, int y) { - return __ockl_mul24_i32(x, y); -} - -__device__ static inline long long __mul64hi(long long int x, long long int y) { - ulong x0 = (ulong)x & 0xffffffffUL; - long x1 = x >> 32; - ulong y0 = (ulong)y & 0xffffffffUL; - long y1 = y >> 32; - ulong z0 = x0*y0; - long t = x1*y0 + (z0 >> 32); - long z1 = t & 0xffffffffL; - long z2 = t >> 32; - z1 = x0*y1 + z1; - return x1*y1 + z2 + (z1 >> 32); -} - -__device__ static inline int __mulhi(int x, int y) { - return __ockl_mul_hi_i32(x, y); -} - -__device__ static inline int __rhadd(int x, int y) { - int z = x + y + 1; - int sign = z & 0x8000000; - int value = z & 0x7FFFFFFF; - return ((value) >> 1 || sign); -} -__device__ static inline unsigned int __sad(int x, int y, int z) { - return x > y ? x - y + z : y - x + z; -} -__device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { - return (x + y) >> 1; -} -__device__ static inline int __umul24(unsigned int x, unsigned int y) { - return __ockl_mul24_u32(x, y); -} - -__device__ -static inline unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) { - ulong x0 = x & 0xffffffffUL; - ulong x1 = x >> 32; - ulong y0 = y & 0xffffffffUL; - ulong y1 = y >> 32; - ulong z0 = x0*y0; - ulong t = x1*y0 + (z0 >> 32); - ulong z1 = t & 0xffffffffUL; - ulong z2 = t >> 32; - z1 = x0*y1 + z1; - return x1*y1 + z2 + (z1 >> 32); -} - -__device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) { - return __ockl_mul_hi_u32(x, y); -} -__device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) { - return (x + y + 1) >> 1; -} -__device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) { - return __ockl_sad_u32(x, y, z); -} - -__device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } - -/* -HIP specific device functions -*/ - -// utility union type -union __u { - int i; - unsigned int u; - float f; -}; - -__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { - __u tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); - return tmp.u; -} - -__device__ static inline float __hip_ds_bpermutef(int index, float src) { - __u tmp; tmp.f = src; - tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); - return tmp.f; -} - -__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) { - __u tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); - return tmp.u; -} - -__device__ static inline float __hip_ds_permutef(int index, float src) { - __u tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); - return tmp.u; -} - -__device__ static inline unsigned __hip_ds_swizzle(unsigned int src, int pattern) { - __u tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern); - return tmp.u; -} -__device__ static inline float __hip_ds_swizzlef(float src, int pattern) { - __u tmp; tmp.f = src; - tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern); - return tmp.f; -} - -__device__ static inline int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, - int bank_mask, bool bound_ctrl) { - return __llvm_amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); -} - -static constexpr int warpSize = 64; - - __device__ -inline -int __shfl(int var, int src_lane, int width = warpSize) { - int self = __lane_id(); - int index = src_lane + (self & ~(width-1)); - return __llvm_amdgcn_ds_bpermute(index<<2, var); -} -__device__ -inline -unsigned int __shfl(unsigned int var, int src_lane, int width = warpSize) { - __u tmp; tmp.u = var; - tmp.i = __shfl(tmp.i, src_lane, width); - return tmp.u; -} -__device__ -inline -float __shfl(float var, int src_lane, int width = warpSize) { - __u tmp; tmp.f = var; - tmp.i = __shfl(tmp.i, src_lane, width); - return tmp.f; -} -__device__ -inline -double __shfl(double var, int src_lane, int width = warpSize) { - static_assert(sizeof(double) == 2 * sizeof(int), ""); - static_assert(sizeof(double) == sizeof(uint64_t), ""); - - int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); - tmp[0] = __shfl(tmp[0], src_lane, width); - tmp[1] = __shfl(tmp[1], src_lane, width); - - uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); - double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); - return tmp1; -} - - __device__ -inline -int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) { - int self = __lane_id(); - int index = self - lane_delta; - index = (index < (self & ~(width-1)))?self:index; - return __llvm_amdgcn_ds_bpermute(index<<2, var); -} -__device__ -inline -unsigned int __shfl_up(unsigned int var, unsigned int lane_delta, int width = warpSize) { - __u tmp; tmp.u = var; - tmp.i = __shfl_up(tmp.i, lane_delta, width); - return tmp.u; -} -__device__ -inline -float __shfl_up(float var, unsigned int lane_delta, int width = warpSize) { - __u tmp; tmp.f = var; - tmp.i = __shfl_up(tmp.i, lane_delta, width); - return tmp.f; -} -__device__ -inline -double __shfl_up(double var, unsigned int lane_delta, int width = warpSize) { - static_assert(sizeof(double) == 2 * sizeof(int), ""); - static_assert(sizeof(double) == sizeof(uint64_t), ""); - - int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); - tmp[0] = __shfl_up(tmp[0], lane_delta, width); - tmp[1] = __shfl_up(tmp[1], lane_delta, width); - - uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); - double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); - return tmp1; -} - -__device__ -inline -int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) { - int self = __lane_id(); - int index = self + lane_delta; - index = (int)((self&(width-1))+lane_delta) >= width?self:index; - return __llvm_amdgcn_ds_bpermute(index<<2, var); -} -__device__ -inline -unsigned int __shfl_down(unsigned int var, unsigned int lane_delta, int width = warpSize) { - __u tmp; tmp.u = var; - tmp.i = __shfl_down(tmp.i, lane_delta, width); - return tmp.u; -} -__device__ -inline -float __shfl_down(float var, unsigned int lane_delta, int width = warpSize) { - __u tmp; tmp.f = var; - tmp.i = __shfl_down(tmp.i, lane_delta, width); - return tmp.f; -} -__device__ -inline -double __shfl_down(double var, unsigned int lane_delta, int width = warpSize) { - static_assert(sizeof(double) == 2 * sizeof(int), ""); - static_assert(sizeof(double) == sizeof(uint64_t), ""); - - int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); - tmp[0] = __shfl_down(tmp[0], lane_delta, width); - tmp[1] = __shfl_down(tmp[1], lane_delta, width); - - uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); - double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); - return tmp1; -} - -__device__ -inline -int __shfl_xor(int var, int lane_mask, int width = warpSize) { - int self = __lane_id(); - int index = self^lane_mask; - index = index >= ((self+width)&~(width-1))?self:index; - return __llvm_amdgcn_ds_bpermute(index<<2, var); -} -__device__ -inline -unsigned int __shfl_xor(unsigned int var, int lane_mask, int width = warpSize) { - __u tmp; tmp.u = var; - tmp.i = __shfl_xor(tmp.i, lane_mask, width); - return tmp.u; -} -__device__ -inline -float __shfl_xor(float var, int lane_mask, int width = warpSize) { - __u tmp; tmp.f = var; - tmp.i = __shfl_xor(tmp.i, lane_mask, width); - return tmp.f; -} -__device__ -inline -double __shfl_xor(double var, int lane_mask, int width = warpSize) { - static_assert(sizeof(double) == 2 * sizeof(int), ""); - static_assert(sizeof(double) == sizeof(uint64_t), ""); - - int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); - tmp[0] = __shfl_xor(tmp[0], lane_mask, width); - tmp[1] = __shfl_xor(tmp[1], lane_mask, width); - - uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); - double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); - return tmp1; -} - -#define MASK1 0x00ff00ff -#define MASK2 0xff00ff00 - -__device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) { - char4 out; - unsigned one1 = in1.w & MASK1; - unsigned one2 = in2.w & MASK1; - out.w = (one1 + one2) & MASK1; - one1 = in1.w & MASK2; - one2 = in2.w & MASK2; - out.w = out.w | ((one1 + one2) & MASK2); - return out; -} - -__device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) { - char4 out; - unsigned one1 = in1.w & MASK1; - unsigned one2 = in2.w & MASK1; - out.w = (one1 - one2) & MASK1; - one1 = in1.w & MASK2; - one2 = in2.w & MASK2; - out.w = out.w | ((one1 - one2) & MASK2); - return out; -} - -__device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) { - char4 out; - unsigned one1 = in1.w & MASK1; - unsigned one2 = in2.w & MASK1; - out.w = (one1 * one2) & MASK1; - one1 = in1.w & MASK2; - one2 = in2.w & MASK2; - out.w = out.w | ((one1 * one2) & MASK2); - return out; -} - -/* - * Rounding modes are not yet supported in HIP - * TODO: Conversion functions are not correct, need to fix when BE is ready -*/ - -__device__ static inline float __double2float_rd(double x) { return (double)x; } -__device__ static inline float __double2float_rn(double x) { return (double)x; } -__device__ static inline float __double2float_ru(double x) { return (double)x; } -__device__ static inline float __double2float_rz(double x) { return (double)x; } - -__device__ static inline int __double2hiint(double x) { - static_assert(sizeof(double) == 2 * sizeof(int), ""); - - int tmp[2]; - __builtin_memcpy(tmp, &x, sizeof(tmp)); - - return tmp[1]; -} -__device__ static inline int __double2loint(double x) { - static_assert(sizeof(double) == 2 * sizeof(int), ""); - - int tmp[2]; - __builtin_memcpy(tmp, &x, sizeof(tmp)); - - return tmp[0]; -} - -__device__ static inline int __double2int_rd(double x) { return (int)x; } -__device__ static inline int __double2int_rn(double x) { return (int)x; } -__device__ static inline int __double2int_ru(double x) { return (int)x; } -__device__ static inline int __double2int_rz(double x) { return (int)x; } - -__device__ static inline long long int __double2ll_rd(double x) { return (long long int)x; } -__device__ static inline long long int __double2ll_rn(double x) { return (long long int)x; } -__device__ static inline long long int __double2ll_ru(double x) { return (long long int)x; } -__device__ static inline long long int __double2ll_rz(double x) { return (long long int)x; } - -__device__ static inline unsigned int __double2uint_rd(double x) { return (unsigned int)x; } -__device__ static inline unsigned int __double2uint_rn(double x) { return (unsigned int)x; } -__device__ static inline unsigned int __double2uint_ru(double x) { return (unsigned int)x; } -__device__ static inline unsigned int __double2uint_rz(double x) { return (unsigned int)x; } - -__device__ static inline unsigned long long int __double2ull_rd(double x) { - return (unsigned long long int)x; -} -__device__ static inline unsigned long long int __double2ull_rn(double x) { - return (unsigned long long int)x; -} -__device__ static inline unsigned long long int __double2ull_ru(double x) { - return (unsigned long long int)x; -} -__device__ static inline unsigned long long int __double2ull_rz(double x) { - return (unsigned long long int)x; -} - -__device__ static inline long long int __double_as_longlong(double x) { - static_assert(sizeof(long long) == sizeof(double), ""); - - long long tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} - -/* -__device__ unsigned short __float2half_rn(float x); -__device__ float __half2float(unsigned short); - -The above device function are not a valid . -Use -__device__ __half __float2half_rn(float x); -__device__ float __half2float(__half); -from hip_fp16.h - -CUDA implements half as unsigned short whereas, HIP doesn't. - -*/ - -__device__ static inline int __float2int_rd(float x) { return (int)__ocml_floor_f32(x); } -__device__ static inline int __float2int_rn(float x) { return (int)__ocml_rint_f32(x); } -__device__ static inline int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); } -__device__ static inline int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); } - -__device__ static inline long long int __float2ll_rd(float x) { return (long long int)x; } -__device__ static inline long long int __float2ll_rn(float x) { return (long long int)x; } -__device__ static inline long long int __float2ll_ru(float x) { return (long long int)x; } -__device__ static inline long long int __float2ll_rz(float x) { return (long long int)x; } - -__device__ static inline unsigned int __float2uint_rd(float x) { return (unsigned int)x; } -__device__ static inline unsigned int __float2uint_rn(float x) { return (unsigned int)x; } -__device__ static inline unsigned int __float2uint_ru(float x) { return (unsigned int)x; } -__device__ static inline unsigned int __float2uint_rz(float x) { return (unsigned int)x; } - -__device__ static inline unsigned long long int __float2ull_rd(float x) { - return (unsigned long long int)x; -} -__device__ static inline unsigned long long int __float2ull_rn(float x) { - return (unsigned long long int)x; -} -__device__ static inline unsigned long long int __float2ull_ru(float x) { - return (unsigned long long int)x; -} -__device__ static inline unsigned long long int __float2ull_rz(float x) { - return (unsigned long long int)x; -} - -__device__ static inline int __float_as_int(float x) { - static_assert(sizeof(int) == sizeof(float), ""); - - int tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} - -__device__ static inline unsigned int __float_as_uint(float x) { - static_assert(sizeof(unsigned int) == sizeof(float), ""); - - unsigned int tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} - -__device__ static inline double __hiloint2double(int hi, int lo) { - static_assert(sizeof(double) == sizeof(uint64_t), ""); - - uint64_t tmp0 = (static_cast(hi) << 32ull) | static_cast(lo); - double tmp1; - __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); - - return tmp1; -} - -__device__ static inline double __int2double_rn(int x) { return (double)x; } - -__device__ static inline float __int2float_rd(int x) { return (float)x; } -__device__ static inline float __int2float_rn(int x) { return (float)x; } -__device__ static inline float __int2float_ru(int x) { return (float)x; } -__device__ static inline float __int2float_rz(int x) { return (float)x; } - -__device__ static inline float __int_as_float(int x) { - static_assert(sizeof(float) == sizeof(int), ""); - - float tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} - -__device__ static inline double __ll2double_rd(long long int x) { return (double)x; } -__device__ static inline double __ll2double_rn(long long int x) { return (double)x; } -__device__ static inline double __ll2double_ru(long long int x) { return (double)x; } -__device__ static inline double __ll2double_rz(long long int x) { return (double)x; } - -__device__ static inline float __ll2float_rd(long long int x) { return (float)x; } -__device__ static inline float __ll2float_rn(long long int x) { return (float)x; } -__device__ static inline float __ll2float_ru(long long int x) { return (float)x; } -__device__ static inline float __ll2float_rz(long long int x) { return (float)x; } - -__device__ static inline double __longlong_as_double(long long int x) { - static_assert(sizeof(double) == sizeof(long long), ""); - - double tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} - -__device__ static inline double __uint2double_rn(int x) { return (double)x; } - -__device__ static inline float __uint2float_rd(unsigned int x) { return (float)x; } -__device__ static inline float __uint2float_rn(unsigned int x) { return (float)x; } -__device__ static inline float __uint2float_ru(unsigned int x) { return (float)x; } -__device__ static inline float __uint2float_rz(unsigned int x) { return (float)x; } - -__device__ static inline float __uint_as_float(unsigned int x) { - static_assert(sizeof(float) == sizeof(unsigned int), ""); - - float tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} - -__device__ static inline double __ull2double_rd(unsigned long long int x) { return (double)x; } -__device__ static inline double __ull2double_rn(unsigned long long int x) { return (double)x; } -__device__ static inline double __ull2double_ru(unsigned long long int x) { return (double)x; } -__device__ static inline double __ull2double_rz(unsigned long long int x) { return (double)x; } - -__device__ static inline float __ull2float_rd(unsigned long long int x) { return (float)x; } -__device__ static inline float __ull2float_rn(unsigned long long int x) { return (float)x; } -__device__ static inline float __ull2float_ru(unsigned long long int x) { return (float)x; } -__device__ static inline float __ull2float_rz(unsigned long long int x) { return (float)x; } - -#if defined(__HCC__) -#define __HCC_OR_HIP_CLANG__ 1 -#elif defined(__clang__) && defined(__HIP__) -#define __HCC_OR_HIP_CLANG__ 1 -#else -#define __HCC_OR_HIP_CLANG__ 0 -#endif - -#ifdef __HCC_OR_HIP_CLANG__ - -// Clock functions -__device__ long long int __clock64(); -__device__ long long int __clock(); -__device__ long long int clock64(); -__device__ long long int clock(); -// hip.amdgcn.bc - named sync -__device__ void __named_sync(int a, int b); - -#ifdef __HIP_DEVICE_COMPILE__ - -// Clock functions -#if __HCC__ -extern "C" uint64_t __clock_u64() __HC__; -#endif - -__device__ -inline __attribute((always_inline)) -long long int __clock64() { -// ToDo: Unify HCC and HIP implementation. -#if __HCC__ - return (long long int) __clock_u64(); -#else - return (long long int) __builtin_amdgcn_s_memrealtime(); -#endif -} - -__device__ -inline __attribute((always_inline)) -long long int __clock() { return __clock64(); } - -__device__ -inline __attribute__((always_inline)) -long long int clock64() { return __clock64(); } - -__device__ -inline __attribute__((always_inline)) -long long int clock() { return __clock(); } - -// hip.amdgcn.bc - named sync -__device__ -inline -void __named_sync(int a, int b) { __builtin_amdgcn_s_barrier(); } - -#endif // __HIP_DEVICE_COMPILE__ - -// warp vote function __all __any __ballot -__device__ -inline -int __all(int predicate) { - return __ockl_wfall_i32(predicate); -} - -__device__ -inline -int __any(int predicate) { - return __ockl_wfany_i32(predicate); -} - -// XXX from llvm/include/llvm/IR/InstrTypes.h -#define ICMP_NE 33 - -__device__ -inline -unsigned long long int __ballot(int predicate) { - return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE); -} - -__device__ -inline -unsigned long long int __ballot64(int predicate) { - return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE); -} - -// hip.amdgcn.bc - lanemask -__device__ -inline -int64_t __lanemask_gt() -{ - int32_t activelane = __ockl_activelane_u32(); - int64_t ballot = __ballot64(1); - if (activelane != 63) { - int64_t tmp = (~0UL) << (activelane + 1); - return tmp & ballot; - } - return 0; -} - -__device__ -inline -int64_t __lanemask_lt() -{ - int32_t activelane = __ockl_activelane_u32(); - int64_t ballot = __ballot64(1); - if (activelane == 0) - return 0; - return ballot; -} - -__device__ inline void* __local_to_generic(void* p) { return p; } - -#ifdef __HIP_DEVICE_COMPILE__ -__device__ -inline -void* __get_dynamicgroupbaseptr() -{ - // Get group segment base pointer. - return (char*)__local_to_generic((void*)__to_local(__llvm_amdgcn_groupstaticsize())); -} -#else -__device__ -void* __get_dynamicgroupbaseptr(); -#endif // __HIP_DEVICE_COMPILE__ - -__device__ -inline -void *__amdgcn_get_dynamicgroupbaseptr() { - return __get_dynamicgroupbaseptr(); -} - -#if defined(__HCC__) && (__hcc_minor__ < 3) -// hip.amdgcn.bc - sync threads -#define __CLK_LOCAL_MEM_FENCE 0x01 -typedef unsigned __cl_mem_fence_flags; - -typedef enum __memory_scope { - __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, - __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, - __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, - __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, - __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP -} __memory_scope; - -// enum values aligned with what clang uses in EmitAtomicExpr() -typedef enum __memory_order -{ - __memory_order_relaxed = __ATOMIC_RELAXED, - __memory_order_acquire = __ATOMIC_ACQUIRE, - __memory_order_release = __ATOMIC_RELEASE, - __memory_order_acq_rel = __ATOMIC_ACQ_REL, - __memory_order_seq_cst = __ATOMIC_SEQ_CST -} __memory_order; - -__device__ -inline -static void -__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope) -{ - // We're tying global-happens-before and local-happens-before together as does HSA - if (order != __memory_order_relaxed) { - switch (scope) { - case __memory_scope_work_item: - break; - case __memory_scope_sub_group: - switch (order) { - case __memory_order_relaxed: break; - case __memory_order_acquire: __llvm_fence_acq_sg(); break; - case __memory_order_release: __llvm_fence_rel_sg(); break; - case __memory_order_acq_rel: __llvm_fence_ar_sg(); break; - case __memory_order_seq_cst: __llvm_fence_sc_sg(); break; - } - break; - case __memory_scope_work_group: - switch (order) { - case __memory_order_relaxed: break; - case __memory_order_acquire: __llvm_fence_acq_wg(); break; - case __memory_order_release: __llvm_fence_rel_wg(); break; - case __memory_order_acq_rel: __llvm_fence_ar_wg(); break; - case __memory_order_seq_cst: __llvm_fence_sc_wg(); break; - } - break; - case __memory_scope_device: - switch (order) { - case __memory_order_relaxed: break; - case __memory_order_acquire: __llvm_fence_acq_dev(); break; - case __memory_order_release: __llvm_fence_rel_dev(); break; - case __memory_order_acq_rel: __llvm_fence_ar_dev(); break; - case __memory_order_seq_cst: __llvm_fence_sc_dev(); break; - } - break; - case __memory_scope_all_svm_devices: - switch (order) { - case __memory_order_relaxed: break; - case __memory_order_acquire: __llvm_fence_acq_sys(); break; - case __memory_order_release: __llvm_fence_rel_sys(); break; - case __memory_order_acq_rel: __llvm_fence_ar_sys(); break; - case __memory_order_seq_cst: __llvm_fence_sc_sys(); break; - } - break; - } - } -} -#endif - -// Memory Fence Functions -__device__ -inline -static void __threadfence() -{ - __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_device); -} - -__device__ -inline -static void __threadfence_block() -{ - __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_work_group); -} - -__device__ -inline -static void __threadfence_system() -{ - __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_all_svm_devices); -} - -// abort -__device__ -inline -__attribute__((weak)) -void abort() { - return __builtin_trap(); -} - - -#endif // __HCC_OR_HIP_CLANG__ - -#ifdef __HCC__ - -/** - * extern __shared__ - */ - -// Macro to replace extern __shared__ declarations -// to local variable definitions -#define HIP_DYNAMIC_SHARED(type, var) type* var = (type*)__get_dynamicgroupbaseptr(); - -#define HIP_DYNAMIC_SHARED_ATTRIBUTE - - -#elif defined(__clang__) && defined(__HIP__) - -#pragma push_macro("__DEVICE__") -#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \ - __attribute__((weak)) - -__DEVICE__ -inline -void __assert_fail(const char * __assertion, - const char *__file, - unsigned int __line, - const char *__function) -{ - // Ignore all the args for now. - __builtin_trap(); -} - -__DEVICE__ -inline -void __assertfail(const char * __assertion, - const char *__file, - unsigned int __line, - const char *__function, - size_t charsize) -{ - // ignore all the args for now. - __builtin_trap(); -} - -__device__ -inline -static void __work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) -{ - if (flags) { - __atomic_work_item_fence(flags, __memory_order_release, scope); - __builtin_amdgcn_s_barrier(); - __atomic_work_item_fence(flags, __memory_order_acquire, scope); - } else { - __builtin_amdgcn_s_barrier(); - } -} - -__device__ -inline -static void __barrier(int n) -{ - __work_group_barrier((__cl_mem_fence_flags)n, __memory_scope_work_group); -} - -__device__ -inline -__attribute__((noduplicate)) -void __syncthreads() -{ - __barrier(__CLK_LOCAL_MEM_FENCE); -} - -// hip.amdgcn.bc - device routine -/* - HW_ID Register bit structure - WAVE_ID 3:0 Wave buffer slot number. 0-9. - SIMD_ID 5:4 SIMD which the wave is assigned to within the CU. - PIPE_ID 7:6 Pipeline from which the wave was dispatched. - CU_ID 11:8 Compute Unit the wave is assigned to. - SH_ID 12 Shader Array (within an SE) the wave is assigned to. - SE_ID 14:13 Shader Engine the wave is assigned to. - TG_ID 19:16 Thread-group ID - VM_ID 23:20 Virtual Memory ID - QUEUE_ID 26:24 Queue from which this wave was dispatched. - STATE_ID 29:27 State ID (graphics only, not compute). - ME_ID 31:30 Micro-engine ID. - */ - -#define HW_ID 4 - -#define HW_ID_CU_ID_SIZE 4 -#define HW_ID_CU_ID_OFFSET 8 - -#define HW_ID_SE_ID_SIZE 2 -#define HW_ID_SE_ID_OFFSET 13 - -/* - Encoding of parameter bitmask - HW_ID 5:0 HW_ID - OFFSET 10:6 Range: 0..31 - SIZE 15:11 Range: 1..32 - */ - -#define GETREG_IMMED(SZ,OFF,REG) (SZ << 11) | (OFF << 6) | REG - -__device__ -inline -unsigned __smid(void) -{ - unsigned cu_id = __builtin_amdgcn_s_getreg( - GETREG_IMMED(HW_ID_CU_ID_SIZE, HW_ID_CU_ID_OFFSET, HW_ID)); - unsigned se_id = __builtin_amdgcn_s_getreg( - GETREG_IMMED(HW_ID_SE_ID_SIZE, HW_ID_SE_ID_OFFSET, HW_ID)); - - /* Each shader engine has 16 CU */ - return (se_id << HW_ID_CU_ID_SIZE) + cu_id; -} - -#pragma push_macro("__DEVICE__") - -// Macro to replace extern __shared__ declarations -// to local variable definitions -#define HIP_DYNAMIC_SHARED(type, var) \ - type* var = (type*)__amdgcn_get_dynamicgroupbaseptr(); - -#define HIP_DYNAMIC_SHARED_ATTRIBUTE - - -#endif //defined(__clang__) && defined(__HIP__) - - -// loop unrolling -static inline __device__ void* __hip_hc_memcpy(void* dst, const void* src, size_t size) { - auto dstPtr = static_cast(dst); - auto srcPtr = static_cast(src); - - while (size >= 4u) { - dstPtr[0] = srcPtr[0]; - dstPtr[1] = srcPtr[1]; - dstPtr[2] = srcPtr[2]; - dstPtr[3] = srcPtr[3]; - - size -= 4u; - srcPtr += 4u; - dstPtr += 4u; - } - switch (size) { - case 3: - dstPtr[2] = srcPtr[2]; - case 2: - dstPtr[1] = srcPtr[1]; - case 1: - dstPtr[0] = srcPtr[0]; - } - - return dst; -} - -static inline __device__ void* __hip_hc_memset(void* dst, unsigned char val, size_t size) { - auto dstPtr = static_cast(dst); - - while (size >= 4u) { - dstPtr[0] = val; - dstPtr[1] = val; - dstPtr[2] = val; - dstPtr[3] = val; - - size -= 4u; - dstPtr += 4u; - } - switch (size) { - case 3: - dstPtr[2] = val; - case 2: - dstPtr[1] = val; - case 1: - dstPtr[0] = val; - } - - return dst; -} -static inline __device__ void* memcpy(void* dst, const void* src, size_t size) { - return __hip_hc_memcpy(dst, src, size); -} - -static inline __device__ void* memset(void* ptr, int val, size_t size) { - unsigned char val8 = static_cast(val); - return __hip_hc_memset(ptr, val8, size); -} - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/device_library_decls.h b/src/utils/amd_hip/hip/hcc_detail/device_library_decls.h deleted file mode 100644 index 2bf3c8cc5..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/device_library_decls.h +++ /dev/null @@ -1,118 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * @file hcc_detail/device_library_decls.h - * @brief Contains declarations for types and functions in device library. - */ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H - -#include "hip/hcc_detail/host_defines.h" - -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; - -extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int); -extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int); -extern "C" __device__ uint __ockl_activelane_u32(void); - -extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint); -extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int); -extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint); -extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int); -extern "C" __device__ __attribute__((const)) uint __ockl_sad_u32(uint, uint, uint); - -extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar); -extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort); -extern "C" __device__ __attribute__((const)) uint __ockl_clz_u32(uint); -extern "C" __device__ __attribute__((const)) ulong __ockl_clz_u64(ulong); - -extern "C" __device__ __attribute__((const)) float __ocml_floor_f32(float); -extern "C" __device__ __attribute__((const)) float __ocml_rint_f32(float); -extern "C" __device__ __attribute__((const)) float __ocml_ceil_f32(float); -extern "C" __device__ __attribute__((const)) float __ocml_trunc_f32(float); - -extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); -extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); - -// Introduce local address space -#define __local __attribute__((address_space(3))) - -#ifdef __HIP_DEVICE_COMPILE__ -__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; } -#endif //__HIP_DEVICE_COMPILE__ - -#if defined(__HCC__) && (__hcc_minor__ < 3) -// __llvm_fence* functions from device-libs/irif/src/fence.ll -extern "C" __device__ void __llvm_fence_acq_sg(void); -extern "C" __device__ void __llvm_fence_acq_wg(void); -extern "C" __device__ void __llvm_fence_acq_dev(void); -extern "C" __device__ void __llvm_fence_acq_sys(void); - -extern "C" __device__ void __llvm_fence_rel_sg(void); -extern "C" __device__ void __llvm_fence_rel_wg(void); -extern "C" __device__ void __llvm_fence_rel_dev(void); -extern "C" __device__ void __llvm_fence_rel_sys(void); - -extern "C" __device__ void __llvm_fence_ar_sg(void); -extern "C" __device__ void __llvm_fence_ar_wg(void); -extern "C" __device__ void __llvm_fence_ar_dev(void); -extern "C" __device__ void __llvm_fence_ar_sys(void); - - -extern "C" __device__ void __llvm_fence_sc_sg(void); -extern "C" __device__ void __llvm_fence_sc_wg(void); -extern "C" __device__ void __llvm_fence_sc_dev(void); -extern "C" __device__ void __llvm_fence_sc_sys(void); -#else -// Using hip.amdgcn.bc - sync threads -#define __CLK_LOCAL_MEM_FENCE 0x01 -typedef unsigned __cl_mem_fence_flags; - -typedef enum __memory_scope { - __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, - __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, - __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, - __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, - __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP -} __memory_scope; - -// enum values aligned with what clang uses in EmitAtomicExpr() -typedef enum __memory_order -{ - __memory_order_relaxed = __ATOMIC_RELAXED, - __memory_order_acquire = __ATOMIC_ACQUIRE, - __memory_order_release = __ATOMIC_RELEASE, - __memory_order_acq_rel = __ATOMIC_ACQ_REL, - __memory_order_seq_cst = __ATOMIC_SEQ_CST -} __memory_order; - -// Linked from hip.amdgcn.bc -extern "C" __device__ void -__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope); -#endif - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/driver_types.h b/src/utils/amd_hip/hip/hcc_detail/driver_types.h deleted file mode 100644 index 8e1fec11f..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/driver_types.h +++ /dev/null @@ -1,314 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H - -#ifndef __cplusplus -#include -#endif - -typedef void* hipDeviceptr_t; -typedef enum hipChannelFormatKind { - hipChannelFormatKindSigned = 0, - hipChannelFormatKindUnsigned = 1, - hipChannelFormatKindFloat = 2, - hipChannelFormatKindNone = 3 -}hipChannelFormatKind; - -typedef struct hipChannelFormatDesc { - int x; - int y; - int z; - int w; - enum hipChannelFormatKind f; -}hipChannelFormatDesc; - -#define HIP_TRSF_NORMALIZED_COORDINATES 0x01 -#define HIP_TRSF_READ_AS_INTEGER 0x00 -#define HIP_TRSA_OVERRIDE_FORMAT 0x01 - -typedef enum hipArray_Format { - HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01, - HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02, - HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03, - HIP_AD_FORMAT_SIGNED_INT8 = 0x08, - HIP_AD_FORMAT_SIGNED_INT16 = 0x09, - HIP_AD_FORMAT_SIGNED_INT32 = 0x0a, - HIP_AD_FORMAT_HALF = 0x10, - HIP_AD_FORMAT_FLOAT = 0x20 -}hipArray_Format; - -typedef struct HIP_ARRAY_DESCRIPTOR { - enum hipArray_Format format; - unsigned int numChannels; - size_t width; - size_t height; - unsigned int flags; - size_t depth; -}HIP_ARRAY_DESCRIPTOR; - -typedef struct hipArray { - void* data; // FIXME: generalize this - struct hipChannelFormatDesc desc; - unsigned int type; - unsigned int width; - unsigned int height; - unsigned int depth; - struct HIP_ARRAY_DESCRIPTOR drvDesc; - bool isDrv; - unsigned int textureType; -}hipArray; - -typedef struct hip_Memcpy2D { - size_t height; - size_t widthInBytes; - hipArray* dstArray; - hipDeviceptr_t dstDevice; - void* dstHost; - hipMemoryType dstMemoryType; - size_t dstPitch; - size_t dstXInBytes; - size_t dstY; - hipArray* srcArray; - hipDeviceptr_t srcDevice; - const void* srcHost; - hipMemoryType srcMemoryType; - size_t srcPitch; - size_t srcXInBytes; - size_t srcY; -} hip_Memcpy2D; - - -typedef struct hipArray* hipArray_t; - -typedef const struct hipArray* hipArray_const_t; - -// TODO: It needs to be modified since it was just copied from hipArray. -struct hipMipmappedArray { - void* data; // FIXME: generalize this - struct hipChannelFormatDesc desc; - unsigned int width; - unsigned int height; - unsigned int depth; -}; - -typedef struct hipMipmappedArray* hipMipmappedArray_t; - -typedef const struct hipMipmappedArray* hipMipmappedArray_const_t; - -/** - * hip resource types - */ -typedef enum hipResourceType { - hipResourceTypeArray = 0x00, - hipResourceTypeMipmappedArray = 0x01, - hipResourceTypeLinear = 0x02, - hipResourceTypePitch2D = 0x03 -}hipResourceType; - -/** - * hip texture resource view formats - */ -typedef enum hipResourceViewFormat { - hipResViewFormatNone = 0x00, - hipResViewFormatUnsignedChar1 = 0x01, - hipResViewFormatUnsignedChar2 = 0x02, - hipResViewFormatUnsignedChar4 = 0x03, - hipResViewFormatSignedChar1 = 0x04, - hipResViewFormatSignedChar2 = 0x05, - hipResViewFormatSignedChar4 = 0x06, - hipResViewFormatUnsignedShort1 = 0x07, - hipResViewFormatUnsignedShort2 = 0x08, - hipResViewFormatUnsignedShort4 = 0x09, - hipResViewFormatSignedShort1 = 0x0a, - hipResViewFormatSignedShort2 = 0x0b, - hipResViewFormatSignedShort4 = 0x0c, - hipResViewFormatUnsignedInt1 = 0x0d, - hipResViewFormatUnsignedInt2 = 0x0e, - hipResViewFormatUnsignedInt4 = 0x0f, - hipResViewFormatSignedInt1 = 0x10, - hipResViewFormatSignedInt2 = 0x11, - hipResViewFormatSignedInt4 = 0x12, - hipResViewFormatHalf1 = 0x13, - hipResViewFormatHalf2 = 0x14, - hipResViewFormatHalf4 = 0x15, - hipResViewFormatFloat1 = 0x16, - hipResViewFormatFloat2 = 0x17, - hipResViewFormatFloat4 = 0x18, - hipResViewFormatUnsignedBlockCompressed1 = 0x19, - hipResViewFormatUnsignedBlockCompressed2 = 0x1a, - hipResViewFormatUnsignedBlockCompressed3 = 0x1b, - hipResViewFormatUnsignedBlockCompressed4 = 0x1c, - hipResViewFormatSignedBlockCompressed4 = 0x1d, - hipResViewFormatUnsignedBlockCompressed5 = 0x1e, - hipResViewFormatSignedBlockCompressed5 = 0x1f, - hipResViewFormatUnsignedBlockCompressed6H = 0x20, - hipResViewFormatSignedBlockCompressed6H = 0x21, - hipResViewFormatUnsignedBlockCompressed7 = 0x22 -}hipResourceViewFormat; - -/** - * HIP resource descriptor - */ -typedef struct hipResourceDesc { - enum hipResourceType resType; - - union { - struct { - hipArray_t array; - } array; - struct { - hipMipmappedArray_t mipmap; - } mipmap; - struct { - void* devPtr; - struct hipChannelFormatDesc desc; - size_t sizeInBytes; - } linear; - struct { - void* devPtr; - struct hipChannelFormatDesc desc; - size_t width; - size_t height; - size_t pitchInBytes; - } pitch2D; - } res; -}hipResourceDesc; - -/** - * hip resource view descriptor - */ -struct hipResourceViewDesc { - enum hipResourceViewFormat format; - size_t width; - size_t height; - size_t depth; - unsigned int firstMipmapLevel; - unsigned int lastMipmapLevel; - unsigned int firstLayer; - unsigned int lastLayer; -}; - -/** - * Memory copy types - * - */ -typedef enum hipMemcpyKind { - hipMemcpyHostToHost = 0, ///< Host-to-Host Copy - hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy - hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy - hipMemcpyDeviceToDevice = 3, ///< Device-to-Device Copy - hipMemcpyDefault = - 4 ///< Runtime will automatically determine copy-kind based on virtual addresses. -} hipMemcpyKind; - -typedef struct hipPitchedPtr { - void* ptr; - size_t pitch; - size_t xsize; - size_t ysize; -}hipPitchedPtr; - -typedef struct hipExtent { - size_t width; // Width in elements when referring to array memory, in bytes when referring to - // linear memory - size_t height; - size_t depth; -}hipExtent; - -typedef struct hipPos { - size_t x; - size_t y; - size_t z; -}hipPos; - -typedef struct hipMemcpy3DParms { - hipArray_t srcArray; - struct hipPos srcPos; - struct hipPitchedPtr srcPtr; - - hipArray_t dstArray; - struct hipPos dstPos; - struct hipPitchedPtr dstPtr; - - struct hipExtent extent; - enum hipMemcpyKind kind; - - size_t Depth; - size_t Height; - size_t WidthInBytes; - hipDeviceptr_t dstDevice; - size_t dstHeight; - void* dstHost; - size_t dstLOD; - hipMemoryType dstMemoryType; - size_t dstPitch; - size_t dstXInBytes; - size_t dstY; - size_t dstZ; - void* reserved0; - void* reserved1; - hipDeviceptr_t srcDevice; - size_t srcHeight; - const void* srcHost; - size_t srcLOD; - hipMemoryType srcMemoryType; - size_t srcPitch; - size_t srcXInBytes; - size_t srcY; - size_t srcZ; -}hipMemcpy3DParms; - -static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz, - size_t ysz) { - struct hipPitchedPtr s; - - s.ptr = d; - s.pitch = p; - s.xsize = xsz; - s.ysize = ysz; - - return s; -} - -static inline struct hipPos make_hipPos(size_t x, size_t y, size_t z) { - struct hipPos p; - - p.x = x; - p.y = y; - p.z = z; - - return p; -} - -static inline struct hipExtent make_hipExtent(size_t w, size_t h, size_t d) { - struct hipExtent e; - - e.width = w; - e.height = h; - e.depth = d; - - return e; -} - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/functional_grid_launch.hpp b/src/utils/amd_hip/hip/hcc_detail/functional_grid_launch.hpp deleted file mode 100644 index 2fbda4862..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/functional_grid_launch.hpp +++ /dev/null @@ -1,158 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "code_object_bundle.hpp" -#include "concepts.hpp" -#include "helpers.hpp" -#include "program_state.hpp" - -#include "hc.hpp" -#include "hip/hip_hcc.h" -#include "hip_runtime.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace hip_impl { -template {}>::type* = nullptr> -inline T round_up_to_next_multiple_nonnegative(T x, T y) { - T tmp = x + y - 1; - return tmp - tmp % y; -} - -template < - std::size_t n, - typename... Ts, - typename std::enable_if::type* = nullptr> -inline std::vector make_kernarg( - const std::tuple&, - const std::vector>&, - std::vector kernarg) { - return kernarg; -} - -template < - std::size_t n, - typename... Ts, - typename std::enable_if::type* = nullptr> -inline std::vector make_kernarg( - const std::tuple& formals, - const std::vector>& size_align, - std::vector kernarg) { - using T = typename std::tuple_element>::type; - - static_assert( - !std::is_reference{}, - "A __global__ function cannot have a reference as one of its " - "arguments."); - #if defined(HIP_STRICT) - static_assert( - std::is_trivially_copyable{}, - "Only TriviallyCopyable types can be arguments to a __global__ " - "function"); - #endif - - kernarg.resize(round_up_to_next_multiple_nonnegative( - kernarg.size(), size_align[n].second) + size_align[n].first); - - std::memcpy( - kernarg.data() + kernarg.size() - size_align[n].first, - &std::get(formals), - size_align[n].first); - - return make_kernarg(formals, size_align, std::move(kernarg)); -} - -template -inline std::vector make_kernarg( - void (*kernel)(Formals...), std::tuple actuals) { - static_assert(sizeof...(Formals) == sizeof...(Actuals), - "The count of formal arguments must match the count of actuals."); - - if (sizeof...(Formals) == 0) return {}; - - auto it = function_names().find(reinterpret_cast(kernel)); - if (it == function_names().cend()) { - it = - function_names(true).find(reinterpret_cast(kernel)); - if (it == function_names().cend()) { - throw std::runtime_error{"Undefined __global__ function."}; - } - } - - auto it1 = kernargs().find(it->second); - if (it1 == kernargs().end()) { - it1 = kernargs(true).find(it->second); - - if (it1 == kernargs().end()) { - throw std::runtime_error{ - "Missing metadata for __global__ function: " + it->second}; - } - } - - std::tuple to_formals{std::move(actuals)}; - std::vector kernarg; - kernarg.reserve(sizeof(to_formals)); - - return make_kernarg<0>(to_formals, it1->second, std::move(kernarg)); -} - -void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks, - const dim3& dimBlocks, std::uint32_t sharedMemBytes, hipStream_t stream, - void** kernarg); -} // Namespace hip_impl. - -template -inline void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) { - auto kernarg = hip_impl::make_kernarg( - kernel, std::tuple{std::move(args)...}); - std::size_t kernarg_size = kernarg.size(); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, kernarg.data(), HIP_LAUNCH_PARAM_BUFFER_SIZE, - &kernarg_size, HIP_LAUNCH_PARAM_END}; - - hip_impl::hipLaunchKernelGGLImpl(reinterpret_cast(kernel), numBlocks, dimBlocks, - sharedMemBytes, stream, &config[0]); -} - -template -[[deprecated("hipLaunchKernel is deprecated and will be removed in the next " - "version of HIP; please upgrade to hipLaunchKernelGGL.")]] -inline void hipLaunchKernel(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t groupMemBytes, hipStream_t stream, Args... args) { - hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{}, - std::move(args)...); -} \ No newline at end of file diff --git a/src/utils/amd_hip/hip/hcc_detail/grid_launch.h b/src/utils/amd_hip/hip/hcc_detail/grid_launch.h deleted file mode 100644 index 61fd9bdbe..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/grid_launch.h +++ /dev/null @@ -1,69 +0,0 @@ -#pragma once - -#include - -#include - -#define GRID_LAUNCH_VERSION 20 - -// Extern definitions -namespace hc{ -class completion_future; -class accelerator_view; -} - - -// 3 dim structure for groups and grids. -typedef struct gl_dim3 -{ - int x,y,z; - gl_dim3(uint32_t _x=1, uint32_t _y=1, uint32_t _z=1) : x(_x), y(_y), z(_z) {}; -} gl_dim3; - -typedef enum gl_barrier_bit { - barrier_bit_queue_default, - barrier_bit_none, - barrier_bit_wait, -} gl_barrier_bit; - - -// grid_launch_parm contains information used to launch the kernel. -typedef struct grid_launch_parm -{ - //! Grid dimensions - gl_dim3 grid_dim; - - //! Group dimensions - gl_dim3 group_dim; - - //! Amount of dynamic group memory to use with the kernel launch. - //! This memory is in addition to the amount used statically in the kernel. - unsigned int dynamic_group_mem_bytes; - - //! Control setting of barrier bit on per-packet basis: - //! See gl_barrier_bit description. - //! Placeholder, is not used to control packet dispatch yet - enum gl_barrier_bit barrier_bit; - - //! Value of packet fences to apply to launch. - //! The correspond to the value of bits 9:14 in the AQL packet, - //! see HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE and hsa_fence_scope_t. - //! Set to -1 for conservative defaults. - //! Placeholder, is not used to control packet dispatch yet - unsigned int launch_fence; - - //! Pointer to the accelerator_view where the kernel should execute. - //! If NULL, the default view on the default accelerator is used. - hc::accelerator_view *av; - - //! Pointer to the completion_future used to track the status of the command. - //! If NULL, the command does not write status. In this case, - //! synchronization can be enforced with queue-level waits or - //! waiting on younger commands. - hc::completion_future *cf; - - grid_launch_parm() = default; -} grid_launch_parm; - - -extern void init_grid_launch(grid_launch_parm *gl); diff --git a/src/utils/amd_hip/hip/hcc_detail/grid_launch.hpp b/src/utils/amd_hip/hip/hcc_detail/grid_launch.hpp deleted file mode 100644 index 04ce7e036..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/grid_launch.hpp +++ /dev/null @@ -1,50 +0,0 @@ -#pragma once - -#include "grid_launch.h" -#include "hc.hpp" - -class grid_launch_parm_cxx : public grid_launch_parm -{ -public: - grid_launch_parm_cxx() = default; - - // customized serialization: don't need av and cf in kernel - __attribute__((annotate("serialize"))) - void __cxxamp_serialize(Kalmar::Serialize& s) const { - s.Append(sizeof(int), &grid_dim.x); - s.Append(sizeof(int), &grid_dim.y); - s.Append(sizeof(int), &grid_dim.z); - s.Append(sizeof(int), &group_dim.x); - s.Append(sizeof(int), &group_dim.y); - s.Append(sizeof(int), &group_dim.z); - } - - __attribute__((annotate("user_deserialize"))) - grid_launch_parm_cxx(int grid_dim_x, int grid_dim_y, int grid_dim_z, - int group_dim_x, int group_dim_y, int group_dim_z) { - grid_dim.x = grid_dim_x; - grid_dim.y = grid_dim_y; - grid_dim.z = grid_dim_z; - group_dim.x = group_dim_x; - group_dim.y = group_dim_y; - group_dim.z = group_dim_z; - } -}; - - -extern inline void grid_launch_init(grid_launch_parm *lp) { - lp->grid_dim.x = lp->grid_dim.y = lp->grid_dim.z = 1; - - lp->group_dim.x = lp->group_dim.y = lp->group_dim.z = 1; - - lp->dynamic_group_mem_bytes = 0; - - lp->barrier_bit = barrier_bit_queue_default; - lp->launch_fence = -1; - - // TODO - set to NULL? - static hc::accelerator_view av = hc::accelerator().get_default_view(); - lp->av = &av; - lp->cf = NULL; -} - diff --git a/src/utils/amd_hip/hip/hcc_detail/grid_launch_GGL.hpp b/src/utils/amd_hip/hip/hcc_detail/grid_launch_GGL.hpp deleted file mode 100644 index 1c05279e0..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/grid_launch_GGL.hpp +++ /dev/null @@ -1,30 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#pragma once - -#if GENERIC_GRID_LAUNCH == 1 -#if __hcc_workweek__ >= 17481 -#include "functional_grid_launch.hpp" -#else -#include "macro_based_grid_launch.hpp" -#endif -#endif // GENERIC_GRID_LAUNCH \ No newline at end of file diff --git a/src/utils/amd_hip/hip/hcc_detail/helpers.hpp b/src/utils/amd_hip/hip/hcc_detail/helpers.hpp deleted file mode 100644 index 1916945c1..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/helpers.hpp +++ /dev/null @@ -1,110 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once -#include "concepts.hpp" - -#include // For std::conditional, std::decay, std::enable_if, - // std::false_type, std result_of and std::true_type. -#include // For std::declval. - -namespace std { // TODO: these should be removed as soon as possible. -#if (__cplusplus < 201406L) -#if (__cplusplus < 201402L) -template -using enable_if_t = typename enable_if::type; -template -using conditional_t = typename conditional::type; -template -using decay_t = typename decay::type; -template -using result_of_t = typename result_of::type; -template -using remove_reference_t = typename remove_reference::type; -#endif -#endif -} // namespace std - -namespace hip_impl { -template -using void_t_ = void; - -#if (__cplusplus < 201402L) -template -struct is_callable_impl : is_callable_impl {}; - -// Pointer to member function, call through non-pointer. -template -struct is_callable_impl< - F(C, Ts...), 0u, - void_t_().*std::declval())(std::declval()...))> > - : std::true_type {}; - -// Pointer to member function, call through pointer. -template -struct is_callable_impl< - F(C, Ts...), 1u, - void_t_()).*std::declval())(std::declval()...))> > - : std::true_type {}; - -// Pointer to member data, call through non-pointer, no args. -template -struct is_callable_impl().*std::declval())> > - : std::true_type {}; - -// Pointer to member data, call through pointer, no args. -template -struct is_callable_impl().*std::declval())> > - : std::true_type {}; - -// General call, n args. -template -struct is_callable_impl()(std::declval()...))> > - : std::true_type {}; - -// Not callable. -template -struct is_callable_impl : std::false_type {}; -#else -template -struct is_callable_impl : std::false_type {}; - -template -struct is_callable_impl > > : std::true_type {}; -#endif -template -struct is_callable : is_callable_impl {}; - -#define count_macro_args_impl_hip_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \ - _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, \ - _26, _27, _28, _29, _30, _31, _n, ...) \ - _n -#define count_macro_args_hip_(...) \ - count_macro_args_impl_hip_(, ##__VA_ARGS__, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, \ - 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, \ - 0) - -#define overloaded_macro_expand_hip_(macro, arg_cnt) macro##arg_cnt -#define overload_macro_impl_hip_(macro, arg_cnt) overloaded_macro_expand_hip_(macro, arg_cnt) -#define overload_macro_hip_(macro, ...) \ - overload_macro_impl_hip_(macro, count_macro_args_hip_(__VA_ARGS__))(__VA_ARGS__) -} // namespace hip_impl diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_atomic.h b/src/utils/amd_hip/hip/hcc_detail/hip_atomic.h deleted file mode 100644 index a5ac94a74..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_atomic.h +++ /dev/null @@ -1,269 +0,0 @@ -#pragma once - -#include "device_functions.h" - -__device__ -inline -int atomicCAS(int* address, int compare, int val) -{ - __atomic_compare_exchange_n( - address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); - - return compare; -} -__device__ -inline -unsigned int atomicCAS( - unsigned int* address, unsigned int compare, unsigned int val) -{ - __atomic_compare_exchange_n( - address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); - - return compare; -} -__device__ -inline -unsigned long long atomicCAS( - unsigned long long* address, - unsigned long long compare, - unsigned long long val) -{ - __atomic_compare_exchange_n( - address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); - - return compare; -} - -__device__ -inline -int atomicAdd(int* address, int val) -{ - return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned int atomicAdd(unsigned int* address, unsigned int val) -{ - return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned long long atomicAdd( - unsigned long long* address, unsigned long long val) -{ - return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -float atomicAdd(float* address, float val) -{ - unsigned int* uaddr{reinterpret_cast(address)}; - unsigned int old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; - unsigned int r; - - do { - r = old; - old = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r))); - } while (r != old); - - return __uint_as_float(r); -} -__device__ -inline -double atomicAdd(double* address, double val) -{ - unsigned long long* uaddr{reinterpret_cast(address)}; - unsigned long long old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; - unsigned long long r; - - do { - r = old; - old = atomicCAS( - uaddr, r, __double_as_longlong(val + __longlong_as_double(r))); - } while (r != old); - - return __longlong_as_double(r); -} - -__device__ -inline -int atomicSub(int* address, int val) -{ - return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned int atomicSub(unsigned int* address, unsigned int val) -{ - return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED); -} - -__device__ -inline -int atomicExch(int* address, int val) -{ - return __atomic_exchange_n(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned int atomicExch(unsigned int* address, unsigned int val) -{ - return __atomic_exchange_n(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned long long atomicExch(unsigned long long* address, unsigned long long val) -{ - return __atomic_exchange_n(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -float atomicExch(float* address, float val) -{ - return __uint_as_float(__atomic_exchange_n( - reinterpret_cast(address), - __float_as_uint(val), - __ATOMIC_RELAXED)); -} - -__device__ -inline -int atomicMin(int* address, int val) -{ - return __sync_fetch_and_min(address, val); -} -__device__ -inline -unsigned int atomicMin(unsigned int* address, unsigned int val) -{ - return __sync_fetch_and_umin(address, val); -} -__device__ -inline -unsigned long long atomicMin( - unsigned long long* address, unsigned long long val) -{ - unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)}; - while (val < tmp) { tmp = atomicCAS(address, tmp, val); } - - return tmp; -} - -__device__ -inline -int atomicMax(int* address, int val) -{ - return __sync_fetch_and_max(address, val); -} -__device__ -inline -unsigned int atomicMax(unsigned int* address, unsigned int val) -{ - return __sync_fetch_and_umax(address, val); -} -__device__ -inline -unsigned long long atomicMax( - unsigned long long* address, unsigned long long val) -{ - unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)}; - while (tmp < val) { tmp = atomicCAS(address, tmp, val); } - - return tmp; -} - -__device__ -inline -unsigned int atomicInc(unsigned int* address, unsigned int val) -{ - __device__ - extern - unsigned int __builtin_amdgcn_atomic_inc( - unsigned int*, - unsigned int, - unsigned int, - unsigned int, - bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32"); - - return __builtin_amdgcn_atomic_inc( - address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false); -} - -__device__ -inline -unsigned int atomicDec(unsigned int* address, unsigned int val) -{ - __device__ - extern - unsigned int __builtin_amdgcn_atomic_dec( - unsigned int*, - unsigned int, - unsigned int, - unsigned int, - bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32"); - - return __builtin_amdgcn_atomic_dec( - address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false); -} - -__device__ -inline -int atomicAnd(int* address, int val) -{ - return __atomic_fetch_and(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned int atomicAnd(unsigned int* address, unsigned int val) -{ - return __atomic_fetch_and(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned long long atomicAnd( - unsigned long long* address, unsigned long long val) -{ - return __atomic_fetch_and(address, val, __ATOMIC_RELAXED); -} - -__device__ -inline -int atomicOr(int* address, int val) -{ - return __atomic_fetch_or(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned int atomicOr(unsigned int* address, unsigned int val) -{ - return __atomic_fetch_or(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned long long atomicOr( - unsigned long long* address, unsigned long long val) -{ - return __atomic_fetch_or(address, val, __ATOMIC_RELAXED); -} - -__device__ -inline -int atomicXor(int* address, int val) -{ - return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned int atomicXor(unsigned int* address, unsigned int val) -{ - return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); -} -__device__ -inline -unsigned long long atomicXor( - unsigned long long* address, unsigned long long val) -{ - return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); -} - -// TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block. diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_complex.h b/src/utils/amd_hip/hip/hcc_detail/hip_complex.h deleted file mode 100644 index d19abd724..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_complex.h +++ /dev/null @@ -1,356 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H - -#include "hip/hcc_detail/hip_vector_types.h" - -// TODO: Clang has a bug which allows device functions to call std functions -// when std functions are introduced into default namespace by using statement. -// math.h may be included after this bug is fixed. -#if __cplusplus -#include -#else -#include "math.h" -#endif - -#if __cplusplus -#define COMPLEX_NEG_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator-(const type& op) { \ - type ret; \ - ret.x = -op.x; \ - ret.y = -op.y; \ - return ret; \ - } - -#define COMPLEX_EQ_OP_OVERLOAD(type) \ - __device__ __host__ static inline bool operator==(const type& lhs, const type& rhs) { \ - return lhs.x == rhs.x && lhs.y == rhs.y; \ - } - -#define COMPLEX_NE_OP_OVERLOAD(type) \ - __device__ __host__ static inline bool operator!=(const type& lhs, const type& rhs) { \ - return !(lhs == rhs); \ - } - -#define COMPLEX_ADD_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator+(const type& lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs.x + rhs.x; \ - ret.y = lhs.y + rhs.y; \ - return ret; \ - } - -#define COMPLEX_SUB_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator-(const type& lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs.x - rhs.x; \ - ret.y = lhs.y - rhs.y; \ - return ret; \ - } - -#define COMPLEX_MUL_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator*(const type& lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \ - ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \ - return ret; \ - } - -#define COMPLEX_DIV_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator/(const type& lhs, const type& rhs) { \ - type ret; \ - ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \ - ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \ - ret.x = ret.x / (rhs.x * rhs.x + rhs.y * rhs.y); \ - ret.y = ret.y / (rhs.x * rhs.x + rhs.y * rhs.y); \ - return ret; \ - } - -#define COMPLEX_ADD_PREOP_OVERLOAD(type) \ - __device__ __host__ static inline type& operator+=(type& lhs, const type& rhs) { \ - lhs.x += rhs.x; \ - lhs.y += rhs.y; \ - return lhs; \ - } - -#define COMPLEX_SUB_PREOP_OVERLOAD(type) \ - __device__ __host__ static inline type& operator-=(type& lhs, const type& rhs) { \ - lhs.x -= rhs.x; \ - lhs.y -= rhs.y; \ - return lhs; \ - } - -#define COMPLEX_MUL_PREOP_OVERLOAD(type) \ - __device__ __host__ static inline type& operator*=(type& lhs, const type& rhs) { \ - lhs = lhs * rhs; \ - return lhs; \ - } - -#define COMPLEX_DIV_PREOP_OVERLOAD(type) \ - __device__ __host__ static inline type& operator/=(type& lhs, const type& rhs) { \ - lhs = lhs / rhs; \ - return lhs; \ - } - -#define COMPLEX_SCALAR_PRODUCT(type, type1) \ - __device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \ - type ret; \ - ret.x = lhs.x * rhs; \ - ret.y = lhs.y * rhs; \ - return ret; \ - } -#define MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ComplexT, T) \ - explicit __device__ __host__ ComplexT(T val) : x(val), y(val) {} \ - __device__ __host__ ComplexT(T val1, T val2) : x(val1), y(val2) {} - -#endif - -struct hipFloatComplex { -#ifdef __cplusplus - public: - typedef float value_type; - __device__ __host__ hipFloatComplex() : x(0.0f), y(0.0f) {} - explicit __device__ __host__ hipFloatComplex(float x) : x(x), y(0.0f) {} - __device__ __host__ hipFloatComplex(float x, float y) : x(x), y(y) {} - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed long long) -#endif - float x, y; -} __attribute__((aligned(8))); - -struct hipDoubleComplex { -#ifdef __cplusplus - public: - typedef double value_type; - __device__ __host__ hipDoubleComplex() : x(0.0f), y(0.0f) {} - explicit __device__ __host__ hipDoubleComplex(double x) : x(x), y(0.0f) {} - __device__ __host__ hipDoubleComplex(double x, double y) : x(x), y(y) {} - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed long long) -#endif - double x, y; -} __attribute__((aligned(16))); - -#if __cplusplus - -COMPLEX_NEG_OP_OVERLOAD(hipFloatComplex) -COMPLEX_EQ_OP_OVERLOAD(hipFloatComplex) -COMPLEX_NE_OP_OVERLOAD(hipFloatComplex) -COMPLEX_ADD_OP_OVERLOAD(hipFloatComplex) -COMPLEX_SUB_OP_OVERLOAD(hipFloatComplex) -COMPLEX_MUL_OP_OVERLOAD(hipFloatComplex) -COMPLEX_DIV_OP_OVERLOAD(hipFloatComplex) -COMPLEX_ADD_PREOP_OVERLOAD(hipFloatComplex) -COMPLEX_SUB_PREOP_OVERLOAD(hipFloatComplex) -COMPLEX_MUL_PREOP_OVERLOAD(hipFloatComplex) -COMPLEX_DIV_PREOP_OVERLOAD(hipFloatComplex) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned short) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed short) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned int) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed int) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, float) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, double) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long long) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long long) - -COMPLEX_NEG_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_EQ_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_NE_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_ADD_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_SUB_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_MUL_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_DIV_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_ADD_PREOP_OVERLOAD(hipDoubleComplex) -COMPLEX_SUB_PREOP_OVERLOAD(hipDoubleComplex) -COMPLEX_MUL_PREOP_OVERLOAD(hipDoubleComplex) -COMPLEX_DIV_PREOP_OVERLOAD(hipDoubleComplex) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned short) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed short) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned int) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed int) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, float) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, double) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long long) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long) - -#endif - -__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return z.x; } - -__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return z.y; } - -__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { - hipFloatComplex z; - z.x = a; - z.y = b; - return z; -} - -__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { - hipFloatComplex ret; - ret.x = z.x; - ret.y = -z.y; - return ret; -} - -__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) { - return z.x * z.x + z.y * z.y; -} - -__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { - return make_hipFloatComplex(p.x + q.x, p.y + q.y); -} - -__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { - return make_hipFloatComplex(p.x - q.x, p.y - q.y); -} - -__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { - return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); -} - -__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { - float sqabs = hipCsqabsf(q); - hipFloatComplex ret; - ret.x = (p.x * q.x + p.y * q.y) / sqabs; - ret.y = (p.y * q.x - p.x * q.y) / sqabs; - return ret; -} - -__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); } - -__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return z.x; } - -__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return z.y; } - -__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { - hipDoubleComplex z; - z.x = a; - z.y = b; - return z; -} - -__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { - hipDoubleComplex ret; - ret.x = z.x; - ret.y = z.y; - return ret; -} - -__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) { - return z.x * z.x + z.y * z.y; -} - -__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { - return make_hipDoubleComplex(p.x + q.x, p.y + q.y); -} - -__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { - return make_hipDoubleComplex(p.x - q.x, p.y - q.y); -} - -__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { - return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); -} - -__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { - double sqabs = hipCsqabs(q); - hipDoubleComplex ret; - ret.x = (p.x * q.x + p.y * q.y) / sqabs; - ret.y = (p.y * q.x - p.x * q.y) / sqabs; - return ret; -} - -__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return sqrtf(hipCsqabs(z)); } - -typedef hipFloatComplex hipComplex; - -__device__ __host__ static inline hipComplex make_hipComplex(float x, float y) { - return make_hipFloatComplex(x, y); -} - -__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { - return make_hipFloatComplex((float)z.x, (float)z.y); -} - -__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { - return make_hipDoubleComplex((double)z.x, (double)z.y); -} - -__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { - float real = (p.x * q.x) + r.x; - float imag = (q.x * p.y) + r.y; - - real = -(p.y * q.y) + real; - imag = (p.x * q.y) + imag; - - return make_hipComplex(real, imag); -} - -__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, - hipDoubleComplex r) { - float real = (p.x * q.x) + r.x; - float imag = (q.x * p.y) + r.y; - - real = -(p.y * q.y) + real; - imag = (p.x * q.y) + imag; - - return make_hipDoubleComplex(real, imag); -} - -// Complex functions returning real numbers. -#define __DEFINE_HIP_COMPLEX_REAL_FUN(func, hipFun) \ -__device__ __host__ inline float func(const hipFloatComplex& z) { return hipFun##f(z); } \ -__device__ __host__ inline double func(const hipDoubleComplex& z) { return hipFun(z); } - -__DEFINE_HIP_COMPLEX_REAL_FUN(abs, hipCabs) -__DEFINE_HIP_COMPLEX_REAL_FUN(real, hipCreal) -__DEFINE_HIP_COMPLEX_REAL_FUN(imag, hipCimag) - -// Complex functions returning complex numbers. -#define __DEFINE_HIP_COMPLEX_FUN(func, hipFun) \ -__device__ __host__ inline hipFloatComplex func(const hipFloatComplex& z) { return hipFun##f(z); } \ -__device__ __host__ inline hipDoubleComplex func(const hipDoubleComplex& z) { return hipFun(z); } - -__DEFINE_HIP_COMPLEX_FUN(conj, hipConj) - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_db.h b/src/utils/amd_hip/hip/hcc_detail/hip_db.h deleted file mode 100644 index 91ff54d3a..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_db.h +++ /dev/null @@ -1,21 +0,0 @@ -/** - * @defgroup HipDb HCC-specific debug facilities - * @{ - */ - - -/** - * @brief * Print memory tracker information for this pointer. - * - * HIP maintains a table for all memory allocations performed by the application. - * If targetAddress is 0, the entire table is printed to stderr. - * If targetAddress is non-null, this routine will perform some forensic analysis - * to find the pointer - */ -void hipdbPrintMem(void* targetAddress); - - -// doxygen end HipDb -/** - * @} - */ diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_fp16.h b/src/utils/amd_hip/hip/hcc_detail/hip_fp16.h deleted file mode 100644 index 849b7278d..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_fp16.h +++ /dev/null @@ -1,1645 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once -#include "hip/hcc_detail/host_defines.h" -#include -#if defined(__cplusplus) - #include - #include - #include -#endif - -#if defined(__clang__) && (__clang_major__ > 5) - typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2))); - - struct __half_raw { - union { - static_assert(sizeof(_Float16) == sizeof(unsigned short), ""); - - _Float16 data; - unsigned short x; - }; - }; - - struct __half2_raw { - union { - static_assert(sizeof(_Float16_2) == sizeof(unsigned short[2]), ""); - - _Float16_2 data; - struct { - unsigned short x; - unsigned short y; - }; - }; - }; - - #if defined(__cplusplus) - #include "hip_fp16_math_fwd.h" - #include "hip_vector_types.h" - #include "host_defines.h" - - namespace std - { - template<> struct is_floating_point<_Float16> : std::true_type {}; - } - - template - using Enable_if_t = typename std::enable_if::type; - - // BEGIN STRUCT __HALF - struct __half { - protected: - union { - static_assert(sizeof(_Float16) == sizeof(unsigned short), ""); - - _Float16 data; - unsigned short __x; - }; - public: - // CREATORS - __host__ __device__ - __half() = default; - __host__ __device__ - __half(const __half_raw& x) : data{x.data} {} - #if !defined(__HIP_NO_HALF_CONVERSIONS__) - __host__ __device__ - __half(decltype(data) x) : data{x} {} - template< - typename T, - Enable_if_t{}>* = nullptr> - __host__ __device__ - __half(T x) : data{static_cast<_Float16>(x)} {} - #endif - __host__ __device__ - __half(const __half&) = default; - __host__ __device__ - __half(__half&&) = default; - __host__ __device__ - ~__half() = default; - - // CREATORS - DEVICE ONLY - #if !defined(__HIP_NO_HALF_CONVERSIONS__) - template< - typename T, Enable_if_t{}>* = nullptr> - __device__ - __half(T x) : data{static_cast<_Float16>(x)} {} - #endif - - // MANIPULATORS - __host__ __device__ - __half& operator=(const __half&) = default; - __host__ __device__ - __half& operator=(__half&&) = default; - __host__ __device__ - __half& operator=(const __half_raw& x) - { - data = x.data; - return *this; - } - __host__ __device__ - volatile __half& operator=(const __half_raw& x) volatile - { - data = x.data; - return *this; - } - volatile __half& operator=(const volatile __half_raw& x) volatile - { - data = x.data; - return *this; - } - __half& operator=(__half_raw&& x) - { - data = x.data; - return *this; - } - volatile __half& operator=(__half_raw&& x) volatile - { - data = x.data; - return *this; - } - volatile __half& operator=(volatile __half_raw&& x) volatile - { - data = x.data; - return *this; - } - #if !defined(__HIP_NO_HALF_CONVERSIONS__) - template< - typename T, - Enable_if_t{}>* = nullptr> - __host__ __device__ - __half& operator=(T x) - { - data = static_cast<_Float16>(x); - return *this; - } - #endif - - // MANIPULATORS - DEVICE ONLY - #if !defined(__HIP_NO_HALF_CONVERSIONS__) - template< - typename T, Enable_if_t{}>* = nullptr> - __device__ - __half& operator=(T x) - { - data = static_cast<_Float16>(x); - return *this; - } - #endif - - #if !defined(__HIP_NO_HALF_OPERATORS__) - __device__ - __half& operator+=(const __half& x) - { - data += x.data; - return *this; - } - __device__ - __half& operator-=(const __half& x) - { - data -= x.data; - return *this; - } - __device__ - __half& operator*=(const __half& x) - { - data *= x.data; - return *this; - } - __device__ - __half& operator/=(const __half& x) - { - data /= x.data; - return *this; - } - __device__ - __half& operator++() { ++data; return *this; } - __device__ - __half operator++(int) - { - __half tmp{*this}; - ++*this; - return tmp; - } - __device__ - __half& operator--() { --data; return *this; } - __device__ - __half operator--(int) - { - __half tmp{*this}; - --*this; - return tmp; - } - #endif - - // ACCESSORS - #if !defined(__HIP_NO_HALF_CONVERSIONS__) - template< - typename T, - Enable_if_t< - std::is_floating_point{} && - !std::is_same{}>* = nullptr> - operator T() const { return data; } - #endif - __host__ __device__ - operator __half_raw() const { return __half_raw{data}; } - __host__ __device__ - operator volatile __half_raw() const volatile - { - return __half_raw{data}; - } - - // ACCESSORS - DEVICE ONLY - #if !defined(__HIP_NO_HALF_CONVERSIONS__) - template< - typename T, Enable_if_t{}>* = nullptr> - __device__ - operator T() const { return data; } - #endif - - #if !defined(__HIP_NO_HALF_OPERATORS__) - __device__ - __half operator+() const { return *this; } - __device__ - __half operator-() const - { - __half tmp{*this}; - tmp.data = -tmp.data; - return tmp; - } - #endif - - // FRIENDS - #if !defined(__HIP_NO_HALF_OPERATORS__) - friend - inline - __device__ - __half operator+(const __half& x, const __half& y) - { - return __half{x} += y; - } - friend - inline - __device__ - __half operator-(const __half& x, const __half& y) - { - return __half{x} -= y; - } - friend - inline - __device__ - __half operator*(const __half& x, const __half& y) - { - return __half{x} *= y; - } - friend - inline - __device__ - __half operator/(const __half& x, const __half& y) - { - return __half{x} /= y; - } - friend - inline - __device__ - bool operator==(const __half& x, const __half& y) - { - return x.data == y.data; - } - friend - inline - __device__ - bool operator!=(const __half& x, const __half& y) - { - return !(x == y); - } - friend - inline - __device__ - bool operator<(const __half& x, const __half& y) - { - return x.data < y.data; - } - friend - inline - __device__ - bool operator>(const __half& x, const __half& y) - { - return y.data < x.data; - } - friend - inline - __device__ - bool operator<=(const __half& x, const __half& y) - { - return !(y < x); - } - friend - inline - __device__ - bool operator>=(const __half& x, const __half& y) - { - return !(x < y); - } - #endif // !defined(__HIP_NO_HALF_OPERATORS__) - }; - // END STRUCT __HALF - - // BEGIN STRUCT __HALF2 - struct __half2 { - protected: - union { - static_assert( - sizeof(_Float16_2) == sizeof(unsigned short[2]), ""); - - _Float16_2 data; - struct { - unsigned short x; - unsigned short y; - }; - }; - public: - // CREATORS - __host__ __device__ - __half2() = default; - __host__ __device__ - __half2(const __half2_raw& x) : data{x.data} {} - __host__ __device__ - __half2(decltype(data) x) : data{x} {} - __host__ __device__ - __half2(const __half& x, const __half& y) - : - data{ - static_cast<__half_raw>(x).data, - static_cast<__half_raw>(y).data} - {} - __host__ __device__ - __half2(const __half2&) = default; - __host__ __device__ - __half2(__half2&&) = default; - __host__ __device__ - ~__half2() = default; - - // MANIPULATORS - __host__ __device__ - __half2& operator=(const __half2&) = default; - __host__ __device__ - __half2& operator=(__half2&&) = default; - __host__ __device__ - __half2& operator=(const __half2_raw& x) - { - data = x.data; - return *this; - } - - // MANIPULATORS - DEVICE ONLY - #if !defined(__HIP_NO_HALF_OPERATORS__) - __device__ - __half2& operator+=(const __half2& x) - { - data += x.data; - return *this; - } - __device__ - __half2& operator-=(const __half2& x) - { - data -= x.data; - return *this; - } - __device__ - __half2& operator*=(const __half2& x) - { - data *= x.data; - return *this; - } - __device__ - __half2& operator/=(const __half2& x) - { - data /= x.data; - return *this; - } - __device__ - __half2& operator++() { return *this += _Float16_2{1, 1}; } - __device__ - __half2 operator++(int) - { - __half2 tmp{*this}; - ++*this; - return tmp; - } - __device__ - __half2& operator--() { return *this -= _Float16_2{1, 1}; } - __device__ - __half2 operator--(int) - { - __half2 tmp{*this}; - --*this; - return tmp; - } - #endif - - // ACCESSORS - __host__ __device__ - operator decltype(data)() const { return data; } - __host__ __device__ - operator __half2_raw() const { return __half2_raw{data}; } - - // ACCESSORS - DEVICE ONLY - #if !defined(__HIP_NO_HALF_OPERATORS__) - __device__ - __half2 operator+() const { return *this; } - __device__ - __half2 operator-() const - { - __half2 tmp{*this}; - tmp.data = -tmp.data; - return tmp; - } - #endif - - // FRIENDS - #if !defined(__HIP_NO_HALF_OPERATORS__) - friend - inline - __device__ - __half2 operator+(const __half2& x, const __half2& y) - { - return __half2{x} += y; - } - friend - inline - __device__ - __half2 operator-(const __half2& x, const __half2& y) - { - return __half2{x} -= y; - } - friend - inline - __device__ - __half2 operator*(const __half2& x, const __half2& y) - { - return __half2{x} *= y; - } - friend - inline - __device__ - __half2 operator/(const __half2& x, const __half2& y) - { - return __half2{x} /= y; - } - friend - inline - __device__ - bool operator==(const __half2& x, const __half2& y) - { - auto r = x.data == y.data; - return r.x != 0 && r.y != 0; - } - friend - inline - __device__ - bool operator!=(const __half2& x, const __half2& y) - { - return !(x == y); - } - friend - inline - __device__ - bool operator<(const __half2& x, const __half2& y) - { - auto r = x.data < y.data; - return r.x != 0 && r.y != 0; - } - friend - inline - __device__ - bool operator>(const __half2& x, const __half2& y) - { - return y < x; - } - friend - inline - __device__ - bool operator<=(const __half2& x, const __half2& y) - { - return !(y < x); - } - friend - inline - __device__ - bool operator>=(const __half2& x, const __half2& y) - { - return !(x < y); - } - #endif // !defined(__HIP_NO_HALF_OPERATORS__) - }; - // END STRUCT __HALF2 - - namespace - { - inline - __host__ __device__ - __half2 make_half2(__half x, __half y) - { - return __half2{x, y}; - } - - inline - __device__ - __half __low2half(__half2 x) - { - return __half{__half_raw{static_cast<__half2_raw>(x).data.x}}; - } - - inline - __device__ - __half __high2half(__half2 x) - { - return __half{__half_raw{static_cast<__half2_raw>(x).data.y}}; - } - - inline - __device__ - __half2 __half2half2(__half x) - { - return __half2{x, x}; - } - - inline - __device__ - __half2 __halves2half2(__half x, __half y) - { - return __half2{x, y}; - } - - inline - __device__ - __half2 __low2half2(__half2 x) - { - return __half2{ - _Float16_2{ - static_cast<__half2_raw>(x).data.x, - static_cast<__half2_raw>(x).data.x}}; - } - - inline - __device__ - __half2 __high2half2(__half2 x) - { - return __half2_raw{ - _Float16_2{ - static_cast<__half2_raw>(x).data.y, - static_cast<__half2_raw>(x).data.y}}; - } - - inline - __device__ - __half2 __lows2half2(__half2 x, __half2 y) - { - return __half2_raw{ - _Float16_2{ - static_cast<__half2_raw>(x).data.x, - static_cast<__half2_raw>(y).data.x}}; - } - - inline - __device__ - __half2 __highs2half2(__half2 x, __half2 y) - { - return __half2_raw{ - _Float16_2{ - static_cast<__half2_raw>(x).data.y, - static_cast<__half2_raw>(y).data.y}}; - } - - inline - __device__ - __half2 __lowhigh2highlow(__half2 x) - { - return __half2_raw{ - _Float16_2{ - static_cast<__half2_raw>(x).data.y, - static_cast<__half2_raw>(x).data.x}}; - } - - // Bitcasts - inline - __device__ - short __half_as_short(__half x) - { - return static_cast<__half_raw>(x).x; - } - - inline - __device__ - unsigned short __half_as_ushort(__half x) - { - return static_cast<__half_raw>(x).x; - } - - inline - __device__ - __half __short_as_half(short x) - { - __half_raw r; r.x = x; - return r; - } - - inline - __device__ - __half __ushort_as_half(unsigned short x) - { - __half_raw r; r.x = x; - return r; - } - - // TODO: rounding behaviour is not correct. - // float -> half | half2 - inline - __device__ __host__ - __half __float2half(float x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ __host__ - __half __float2half_rn(float x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ __host__ - __half __float2half_rz(float x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ __host__ - __half __float2half_rd(float x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ __host__ - __half __float2half_ru(float x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ __host__ - __half2 __float2half2_rn(float x) - { - return __half2_raw{ - _Float16_2{ - static_cast<_Float16>(x), static_cast<_Float16>(x)}}; - } - inline - __device__ __host__ - __half2 __floats2half2_rn(float x, float y) - { - return __half2_raw{_Float16_2{ - static_cast<_Float16>(x), static_cast<_Float16>(y)}}; - } - inline - __device__ __host__ - __half2 __float22half2_rn(float2 x) - { - return __floats2half2_rn(x.x, x.y); - } - - // half | half2 -> float - inline - __device__ __host__ - float __half2float(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ __host__ - float __low2float(__half2 x) - { - return static_cast<__half2_raw>(x).data.x; - } - inline - __device__ __host__ - float __high2float(__half2 x) - { - return static_cast<__half2_raw>(x).data.y; - } - inline - __device__ __host__ - float2 __half22float2(__half2 x) - { - return make_float2( - static_cast<__half2_raw>(x).data.x, - static_cast<__half2_raw>(x).data.y); - } - - // half -> int - inline - __device__ - int __half2int_rn(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - int __half2int_rz(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - int __half2int_rd(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - int __half2int_ru(__half x) - { - return static_cast<__half_raw>(x).data; - } - - // int -> half - inline - __device__ - __half __int2half_rn(int x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __int2half_rz(int x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __int2half_rd(int x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __int2half_ru(int x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - - // half -> short - inline - __device__ - short __half2short_rn(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - short __half2short_rz(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - short __half2short_rd(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - short __half2short_ru(__half x) - { - return static_cast<__half_raw>(x).data; - } - - // short -> half - inline - __device__ - __half __short2half_rn(short x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __short2half_rz(short x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __short2half_rd(short x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __short2half_ru(short x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - - // half -> long long - inline - __device__ - long long __half2ll_rn(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - long long __half2ll_rz(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - long long __half2ll_rd(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - long long __half2ll_ru(__half x) - { - return static_cast<__half_raw>(x).data; - } - - // long long -> half - inline - __device__ - __half __ll2half_rn(long long x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __ll2half_rz(long long x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __ll2half_rd(long long x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __ll2half_ru(long long x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - - // half -> unsigned int - inline - __device__ - unsigned int __half2uint_rn(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - unsigned int __half2uint_rz(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - unsigned int __half2uint_rd(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - unsigned int __half2uint_ru(__half x) - { - return static_cast<__half_raw>(x).data; - } - - // unsigned int -> half - inline - __device__ - __half __uint2half_rn(unsigned int x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __uint2half_rz(unsigned int x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __uint2half_rd(unsigned int x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __uint2half_ru(unsigned int x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - - // half -> unsigned short - inline - __device__ - unsigned short __half2ushort_rn(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - unsigned short __half2ushort_rz(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - unsigned short __half2ushort_rd(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - unsigned short __half2ushort_ru(__half x) - { - return static_cast<__half_raw>(x).data; - } - - // unsigned short -> half - inline - __device__ - __half __ushort2half_rn(unsigned short x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __ushort2half_rz(unsigned short x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __ushort2half_rd(unsigned short x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __ushort2half_ru(unsigned short x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - - // half -> unsigned long long - inline - __device__ - unsigned long long __half2ull_rn(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - unsigned long long __half2ull_rz(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - unsigned long long __half2ull_rd(__half x) - { - return static_cast<__half_raw>(x).data; - } - inline - __device__ - unsigned long long __half2ull_ru(__half x) - { - return static_cast<__half_raw>(x).data; - } - - // unsigned long long -> half - inline - __device__ - __half __ull2half_rn(unsigned long long x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __ull2half_rz(unsigned long long x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __ull2half_rd(unsigned long long x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - inline - __device__ - __half __ull2half_ru(unsigned long long x) - { - return __half_raw{static_cast<_Float16>(x)}; - } - - // Load primitives - inline - __device__ - __half __ldg(const __half* ptr) { return *ptr; } - inline - __device__ - __half __ldcg(const __half* ptr) { return *ptr; } - inline - __device__ - __half __ldca(const __half* ptr) { return *ptr; } - inline - __device__ - __half __ldcs(const __half* ptr) { return *ptr; } - - inline - __device__ - __half2 __ldg(const __half2* ptr) { return *ptr; } - inline - __device__ - __half2 __ldcg(const __half2* ptr) { return *ptr; } - inline - __device__ - __half2 __ldca(const __half2* ptr) { return *ptr; } - inline - __device__ - __half2 __ldcs(const __half2* ptr) { return *ptr; } - - // Relations - inline - __device__ - bool __heq(__half x, __half y) - { - return static_cast<__half_raw>(x).data == - static_cast<__half_raw>(y).data; - } - inline - __device__ - bool __hne(__half x, __half y) - { - return static_cast<__half_raw>(x).data != - static_cast<__half_raw>(y).data; - } - inline - __device__ - bool __hle(__half x, __half y) - { - return static_cast<__half_raw>(x).data <= - static_cast<__half_raw>(y).data; - } - inline - __device__ - bool __hge(__half x, __half y) - { - return static_cast<__half_raw>(x).data >= - static_cast<__half_raw>(y).data; - } - inline - __device__ - bool __hlt(__half x, __half y) - { - return static_cast<__half_raw>(x).data < - static_cast<__half_raw>(y).data; - } - inline - __device__ - bool __hgt(__half x, __half y) - { - return static_cast<__half_raw>(x).data > - static_cast<__half_raw>(y).data; - } - inline - __device__ - bool __hequ(__half x, __half y) { return __heq(x, y); } - inline - __device__ - bool __hneu(__half x, __half y) { return __hne(x, y); } - inline - __device__ - bool __hleu(__half x, __half y) { return __hle(x, y); } - inline - __device__ - bool __hgeu(__half x, __half y) { return __hge(x, y); } - inline - __device__ - bool __hltu(__half x, __half y) { return __hlt(x, y); } - inline - __device__ - bool __hgtu(__half x, __half y) { return __hgt(x, y); } - - inline - __device__ - __half2 __heq2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(x).data == - static_cast<__half2_raw>(y).data; - return __half2_raw{_Float16_2{ - static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; - } - inline - __device__ - __half2 __hne2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(x).data != - static_cast<__half2_raw>(y).data; - return __half2_raw{_Float16_2{ - static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; - } - inline - __device__ - __half2 __hle2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(x).data <= - static_cast<__half2_raw>(y).data; - return __half2_raw{_Float16_2{ - static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; - } - inline - __device__ - __half2 __hge2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(x).data >= - static_cast<__half2_raw>(y).data; - return __half2_raw{_Float16_2{ - static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; - } - inline - __device__ - __half2 __hlt2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(x).data < - static_cast<__half2_raw>(y).data; - return __half2_raw{_Float16_2{ - static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; - } - inline - __device__ - __half2 __hgt2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(x).data > - static_cast<__half2_raw>(y).data; - return __half2_raw{_Float16_2{ - static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; - } - inline - __device__ - __half2 __hequ2(__half2 x, __half2 y) { return __heq2(x, y); } - inline - __device__ - __half2 __hneu2(__half2 x, __half2 y) { return __hne2(x, y); } - inline - __device__ - __half2 __hleu2(__half2 x, __half2 y) { return __hle2(x, y); } - inline - __device__ - __half2 __hgeu2(__half2 x, __half2 y) { return __hge2(x, y); } - inline - __device__ - __half2 __hltu2(__half2 x, __half2 y) { return __hlt2(x, y); } - inline - __device__ - __half2 __hgtu2(__half2 x, __half2 y) { return __hgt2(x, y); } - - inline - __device__ - bool __hbeq2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(__heq2(x, y)); - return r.data.x != 0 && r.data.y != 0; - } - inline - __device__ - bool __hbne2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(__hne2(x, y)); - return r.data.x != 0 && r.data.y != 0; - } - inline - __device__ - bool __hble2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(__hle2(x, y)); - return r.data.x != 0 && r.data.y != 0; - } - inline - __device__ - bool __hbge2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(__hge2(x, y)); - return r.data.x != 0 && r.data.y != 0; - } - inline - __device__ - bool __hblt2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(__hlt2(x, y)); - return r.data.x != 0 && r.data.y != 0; - } - inline - __device__ - bool __hbgt2(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(__hgt2(x, y)); - return r.data.x != 0 && r.data.y != 0; - } - inline - __device__ - bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); } - inline - __device__ - bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); } - inline - __device__ - bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); } - inline - __device__ - bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); } - inline - __device__ - bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); } - inline - __device__ - bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); } - - // Arithmetic - inline - __device__ - __half __clamp_01(__half x) - { - auto r = static_cast<__half_raw>(x); - - if (__hlt(x, __half_raw{0})) return __half_raw{0}; - if (__hlt(__half_raw{1}, x)) return __half_raw{1}; - return r; - } - - inline - __device__ - __half __hadd(__half x, __half y) - { - return __half_raw{ - static_cast<__half_raw>(x).data + - static_cast<__half_raw>(y).data}; - } - inline - __device__ - __half __hsub(__half x, __half y) - { - return __half_raw{ - static_cast<__half_raw>(x).data - - static_cast<__half_raw>(y).data}; - } - inline - __device__ - __half __hmul(__half x, __half y) - { - return __half_raw{ - static_cast<__half_raw>(x).data * - static_cast<__half_raw>(y).data}; - } - inline - __device__ - __half __hadd_sat(__half x, __half y) - { - return __clamp_01(__hadd(x, y)); - } - inline - __device__ - __half __hsub_sat(__half x, __half y) - { - return __clamp_01(__hsub(x, y)); - } - inline - __device__ - __half __hmul_sat(__half x, __half y) - { - return __clamp_01(__hmul(x, y)); - } - inline - __device__ - __half __hfma(__half x, __half y, __half z) - { - return __half_raw{__ocml_fma_f16( - static_cast<__half_raw>(x).data, - static_cast<__half_raw>(y).data, - static_cast<__half_raw>(z).data)}; - } - inline - __device__ - __half __hfma_sat(__half x, __half y, __half z) - { - return __clamp_01(__hfma(x, y, z)); - } - inline - __device__ - __half __hdiv(__half x, __half y) - { - return __half_raw{ - static_cast<__half_raw>(x).data / - static_cast<__half_raw>(y).data}; - } - - inline - __device__ - __half2 __hadd2(__half2 x, __half2 y) - { - return __half2_raw{ - static_cast<__half2_raw>(x).data + - static_cast<__half2_raw>(y).data}; - } - inline - __device__ - __half2 __hsub2(__half2 x, __half2 y) - { - return __half2_raw{ - static_cast<__half2_raw>(x).data - - static_cast<__half2_raw>(y).data}; - } - inline - __device__ - __half2 __hmul2(__half2 x, __half2 y) - { - return __half2_raw{ - static_cast<__half2_raw>(x).data * - static_cast<__half2_raw>(y).data}; - } - inline - __device__ - __half2 __hadd2_sat(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(__hadd2(x, y)); - return __half2{ - __clamp_01(__half_raw{r.data.x}), - __clamp_01(__half_raw{r.data.y})}; - } - inline - __device__ - __half2 __hsub2_sat(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(__hsub2(x, y)); - return __half2{ - __clamp_01(__half_raw{r.data.x}), - __clamp_01(__half_raw{r.data.y})}; - } - inline - __device__ - __half2 __hmul2_sat(__half2 x, __half2 y) - { - auto r = static_cast<__half2_raw>(__hmul2(x, y)); - return __half2{ - __clamp_01(__half_raw{r.data.x}), - __clamp_01(__half_raw{r.data.y})}; - } - inline - __device__ - __half2 __hfma2(__half2 x, __half2 y, __half2 z) - { - return __half2_raw{__ocml_fma_2f16(x, y, z)}; - } - inline - __device__ - __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z) - { - auto r = static_cast<__half2_raw>(__hfma2(x, y, z)); - return __half2{ - __clamp_01(__half_raw{r.data.x}), - __clamp_01(__half_raw{r.data.y})}; - } - inline - __device__ - __half2 __h2div(__half2 x, __half2 y) - { - return __half2_raw{ - static_cast<__half2_raw>(x).data / - static_cast<__half2_raw>(y).data}; - } - - // Math functions - #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ - inline - __device__ - float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) { - return __ockl_fdot2(static_cast<__half2_raw>(a).data, - static_cast<__half2_raw>(b).data, - c, saturate); - } - #endif - inline - __device__ - __half htrunc(__half x) - { - return __half_raw{ - __ocml_trunc_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hceil(__half x) - { - return __half_raw{ - __ocml_ceil_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hfloor(__half x) - { - return __half_raw{ - __ocml_floor_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hrint(__half x) - { - return __half_raw{ - __ocml_rint_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hsin(__half x) - { - return __half_raw{ - __ocml_sin_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hcos(__half x) - { - return __half_raw{ - __ocml_cos_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hexp(__half x) - { - return __half_raw{ - __ocml_exp_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hexp2(__half x) - { - return __half_raw{ - __ocml_exp2_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hexp10(__half x) - { - return __half_raw{ - __ocml_exp10_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hlog2(__half x) - { - return __half_raw{ - __ocml_log2_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hlog(__half x) - { - return __half_raw{ - __ocml_log_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hlog10(__half x) - { - return __half_raw{ - __ocml_log10_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hrcp(__half x) - { - return __half_raw{ - __llvm_amdgcn_rcp_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hrsqrt(__half x) - { - return __half_raw{ - __ocml_rsqrt_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - __half hsqrt(__half x) - { - return __half_raw{ - __ocml_sqrt_f16(static_cast<__half_raw>(x).data)}; - } - inline - __device__ - bool __hisinf(__half x) - { - return __ocml_isinf_f16(static_cast<__half_raw>(x).data); - } - inline - __device__ - bool __hisnan(__half x) - { - return __ocml_isnan_f16(static_cast<__half_raw>(x).data); - } - inline - __device__ - __half __hneg(__half x) - { - return __half_raw{-static_cast<__half_raw>(x).data}; - } - - inline - __device__ - __half2 h2trunc(__half2 x) - { - return __half2_raw{__ocml_trunc_2f16(x)}; - } - inline - __device__ - __half2 h2ceil(__half2 x) - { - return __half2_raw{__ocml_ceil_2f16(x)}; - } - inline - __device__ - __half2 h2floor(__half2 x) - { - return __half2_raw{__ocml_floor_2f16(x)}; - } - inline - __device__ - __half2 h2rint(__half2 x) - { - return __half2_raw{__ocml_rint_2f16(x)}; - } - inline - __device__ - __half2 h2sin(__half2 x) - { - return __half2_raw{__ocml_sin_2f16(x)}; - } - inline - __device__ - __half2 h2cos(__half2 x) - { - return __half2_raw{__ocml_cos_2f16(x)}; - } - inline - __device__ - __half2 h2exp(__half2 x) - { - return __half2_raw{__ocml_exp_2f16(x)}; - } - inline - __device__ - __half2 h2exp2(__half2 x) - { - return __half2_raw{__ocml_exp2_2f16(x)}; - } - inline - __device__ - __half2 h2exp10(__half2 x) - { - return __half2_raw{__ocml_exp10_2f16(x)}; - } - inline - __device__ - __half2 h2log2(__half2 x) - { - return __half2_raw{__ocml_log2_2f16(x)}; - } - inline - __device__ - __half2 h2log(__half2 x) { return __ocml_log_2f16(x); } - inline - __device__ - __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); } - inline - __device__ - __half2 h2rcp(__half2 x) { return __llvm_amdgcn_rcp_2f16(x); } - inline - __device__ - __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); } - inline - __device__ - __half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); } - inline - __device__ - __half2 __hisinf2(__half2 x) - { - auto r = __ocml_isinf_2f16(x); - return __half2_raw{_Float16_2{ - static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; - } - inline - __device__ - __half2 __hisnan2(__half2 x) - { - auto r = __ocml_isnan_2f16(x); - return __half2_raw{_Float16_2{ - static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; - } - inline - __device__ - __half2 __hneg2(__half2 x) - { - return __half2_raw{-static_cast<__half2_raw>(x).data}; - } - } // Anonymous namespace. - - #if !defined(HIP_NO_HALF) - using half = __half; - using half2 = __half2; - #endif - #endif // defined(__cplusplus) -#elif defined(__GNUC__) - #include "hip_fp16_gcc.h" -#endif // !defined(__clang__) && defined(__GNUC__) diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_fp16_gcc.h b/src/utils/amd_hip/hip/hcc_detail/hip_fp16_gcc.h deleted file mode 100644 index 9b31f9e3c..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_fp16_gcc.h +++ /dev/null @@ -1,257 +0,0 @@ -#pragma once - -#if defined(__cplusplus) - #include -#endif - -struct __half_raw { - unsigned short x; -}; - -struct __half2_raw { - unsigned short x; - unsigned short y; -}; - -#if defined(__cplusplus) - struct __half; - - __half __float2half(float); - float __half2float(__half); - - // BEGIN STRUCT __HALF - struct __half { - protected: - unsigned short __x; - public: - // CREATORS - __half() = default; - __half(const __half_raw& x) : __x{x.x} {} - #if !defined(__HIP_NO_HALF_CONVERSIONS__) - __half(float x) : __x{__float2half(x).__x} {} - __half(double x) : __x{__float2half(x).__x} {} - #endif - __half(const __half&) = default; - __half(__half&&) = default; - ~__half() = default; - - // MANIPULATORS - __half& operator=(const __half&) = default; - __half& operator=(__half&&) = default; - __half& operator=(const __half_raw& x) { __x = x.x; return *this; } - #if !defined(__HIP_NO_HALF_CONVERSIONS__) - __half& operator=(float x) - { - __x = __float2half(x).__x; - return *this; - } - __half& operator=(double x) - { - return *this = static_cast(x); - } - #endif - - // ACCESSORS - operator float() const { return __half2float(*this); } - operator __half_raw() const { return __half_raw{__x}; } - }; - // END STRUCT __HALF - - // BEGIN STRUCT __HALF2 - struct __half2 { - protected: - __half x; - __half y; - public: - // CREATORS - __half2() = default; - __half2(const __half2_raw& ix) - : - x{reinterpret_cast(ix.x)}, - y{reinterpret_cast(ix.y)} - {} - __half2(const __half& ix, const __half& iy) : x{ix}, y{iy} {} - __half2(const __half2&) = default; - __half2(__half2&&) = default; - ~__half2() = default; - - // MANIPULATORS - __half2& operator=(const __half2&) = default; - __half2& operator=(__half2&&) = default; - __half2& operator=(const __half2_raw& ix) - { - x = reinterpret_cast(ix.x); - y = reinterpret_cast(ix.y); - return *this; - } - - // ACCESSORS - operator __half2_raw() const - { - return __half2_raw{ - reinterpret_cast(x), - reinterpret_cast(y)}; - } - }; - // END STRUCT __HALF2 - - namespace - { - inline - unsigned short __internal_float2half( - float flt, unsigned int& sgn, unsigned int& rem) - { - unsigned int x{}; - std::memcpy(&x, &flt, sizeof(flt)); - - unsigned int u = (x & 0x7fffffffU); - sgn = ((x >> 16) & 0x8000U); - - // NaN/+Inf/-Inf - if (u >= 0x7f800000U) { - rem = 0; - return static_cast( - (u == 0x7f800000U) ? (sgn | 0x7c00U) : 0x7fffU); - } - // Overflows - if (u > 0x477fefffU) { - rem = 0x80000000U; - return static_cast(sgn | 0x7bffU); - } - // Normal numbers - if (u >= 0x38800000U) { - rem = u << 19; - u -= 0x38000000U; - return static_cast(sgn | (u >> 13)); - } - // +0/-0 - if (u < 0x33000001U) { - rem = u; - return static_cast(sgn); - } - // Denormal numbers - unsigned int exponent = u >> 23; - unsigned int mantissa = (u & 0x7fffffU); - unsigned int shift = 0x7eU - exponent; - mantissa |= 0x800000U; - rem = mantissa << (32 - shift); - return static_cast(sgn | (mantissa >> shift)); - } - - inline - __half __float2half(float x) - { - __half_raw r; - unsigned int sgn{}; - unsigned int rem{}; - r.x = __internal_float2half(x, sgn, rem); - if (rem > 0x80000000U || (rem == 0x80000000U && (r.x & 0x1))) ++r.x; - - return r; - } - - inline - __half __float2half_rn(float x) { return __float2half(x); } - - inline - __half __float2half_rz(float x) - { - __half_raw r; - unsigned int sgn{}; - unsigned int rem{}; - r.x = __internal_float2half(x, sgn, rem); - - return r; - } - - inline - __half __float2half_rd(float x) - { - __half_raw r; - unsigned int sgn{}; - unsigned int rem{}; - r.x = __internal_float2half(x, sgn, rem); - if (rem && sgn) ++r.x; - - return r; - } - - inline - __half __float2half_ru(float x) - { - __half_raw r; - unsigned int sgn{}; - unsigned int rem{}; - r.x = __internal_float2half(x, sgn, rem); - if (rem && !sgn) ++r.x; - - return r; - } - - inline - __half2 __float2half2_rn(float x) - { - return __half2{__float2half_rn(x), __float2half_rn(x)}; - } - - inline - __half2 __floats2half2_rn(float x, float y) - { - return __half2{__float2half_rn(x), __float2half_rn(y)}; - } - - inline - float __internal_half2float(unsigned short x) - { - unsigned int sign = ((x >> 15) & 1); - unsigned int exponent = ((x >> 10) & 0x1f); - unsigned int mantissa = ((x & 0x3ff) << 13); - - if (exponent == 0x1fU) { /* NaN or Inf */ - mantissa = (mantissa ? (sign = 0, 0x7fffffU) : 0); - exponent = 0xffU; - } else if (!exponent) { /* Denorm or Zero */ - if (mantissa) { - unsigned int msb; - exponent = 0x71U; - do { - msb = (mantissa & 0x400000U); - mantissa <<= 1; /* normalize */ - --exponent; - } while (!msb); - mantissa &= 0x7fffffU; /* 1.mantissa is implicit */ - } - } else { - exponent += 0x70U; - } - unsigned int u = ((sign << 31) | (exponent << 23) | mantissa); - float f; - memcpy(&f, &u, sizeof(u)); - - return f; - } - - inline - float __half2float(__half x) - { - return __internal_half2float(static_cast<__half_raw>(x).x); - } - - inline - float __low2float(__half2 x) - { - return __internal_half2float(static_cast<__half2_raw>(x).x); - } - - inline - float __high2float(__half2 x) - { - return __internal_half2float(static_cast<__half2_raw>(x).y); - } - } // Anonymous namespace. - - #if !defined(HIP_NO_HALF) - using half = __half; - using half2 = __half2; - #endif -#endif // defined(__cplusplus) diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_fp16_math_fwd.h b/src/utils/amd_hip/hip/hcc_detail/hip_fp16_math_fwd.h deleted file mode 100644 index eeb617c40..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_fp16_math_fwd.h +++ /dev/null @@ -1,82 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -// /* -// Half Math Functions -// */ - -#include "host_defines.h" - -extern "C" -{ - __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16); - __device__ _Float16 __ocml_cos_f16(_Float16); - __device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16); - __device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16); - __device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16); - __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16); - __device__ __attribute__((const)) - _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16); - __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16); - __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); - __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); - __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16); - __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16); - __device__ __attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16); - __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16); - __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16); - __device__ _Float16 __ocml_sin_f16(_Float16); - __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16); - __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16); - - typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); - typedef short __2i16 __attribute__((ext_vector_type(2))); - - #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ - __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s); - #endif - - __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); - __device__ __2f16 __ocml_cos_2f16(__2f16); - __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); - __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16); - __device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16); - __device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16); - __device__ __attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16); - __device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16); - __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16); - __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16); - __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16); - __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); - __device__ inline - __2f16 __llvm_amdgcn_rcp_2f16(__2f16 x) // Not currently exposed by ROCDL. - { - return __2f16{__llvm_amdgcn_rcp_f16(x.x), __llvm_amdgcn_rcp_f16(x.y)}; - } - __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16); - __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16); - __device__ __2f16 __ocml_sin_2f16(__2f16); - __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16); - __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16); -} diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_ldg.h b/src/utils/amd_hip/hip/hcc_detail/hip_ldg.h deleted file mode 100644 index a5b80b0a1..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_ldg.h +++ /dev/null @@ -1,103 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H - -#if defined(__HCC_OR_HIP_CLANG__) -#if __hcc_workweek__ >= 16164 || defined(__HIP_CLANG_ONLY__) -#include "hip_vector_types.h" -#include "host_defines.h" - -__device__ inline static char __ldg(const char* ptr) { return *ptr; } - -__device__ inline static char2 __ldg(const char2* ptr) { return *ptr; } - -__device__ inline static char4 __ldg(const char4* ptr) { return *ptr; } - -__device__ inline static signed char __ldg(const signed char* ptr) { return ptr[0]; } - -__device__ inline static unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; } - - -__device__ inline static short __ldg(const short* ptr) { return ptr[0]; } - -__device__ inline static short2 __ldg(const short2* ptr) { return ptr[0]; } - -__device__ inline static short4 __ldg(const short4* ptr) { return ptr[0]; } - -__device__ inline static unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; } - - -__device__ inline static int __ldg(const int* ptr) { return ptr[0]; } - -__device__ inline static int2 __ldg(const int2* ptr) { return ptr[0]; } - -__device__ inline static int4 __ldg(const int4* ptr) { return ptr[0]; } - -__device__ inline static unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; } - - -__device__ inline static long __ldg(const long* ptr) { return ptr[0]; } - -__device__ inline static unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; } - - -__device__ inline static long long __ldg(const long long* ptr) { return ptr[0]; } - -__device__ inline static longlong2 __ldg(const longlong2* ptr) { return ptr[0]; } - -__device__ inline static unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; } - - -__device__ inline static uchar2 __ldg(const uchar2* ptr) { return ptr[0]; } - -__device__ inline static uchar4 __ldg(const uchar4* ptr) { return ptr[0]; } - - -__device__ inline static ushort2 __ldg(const ushort2* ptr) { return ptr[0]; } - - -__device__ inline static uint2 __ldg(const uint2* ptr) { return ptr[0]; } - -__device__ inline static uint4 __ldg(const uint4* ptr) { return ptr[0]; } - - -__device__ inline static ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; } - - -__device__ inline static float __ldg(const float* ptr) { return ptr[0]; } - -__device__ inline static float2 __ldg(const float2* ptr) { return ptr[0]; } - -__device__ inline static float4 __ldg(const float4* ptr) { return ptr[0]; } - - -__device__ inline static double __ldg(const double* ptr) { return ptr[0]; } - -__device__ inline static double2 __ldg(const double2* ptr) { return ptr[0]; } - -#endif // __hcc_workweek__ || defined(__HIP_CLANG_ONLY__) - -#endif // defined(__HCC_OR_HIP_CLANG__) - -#endif // HIP_LDG_H diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_memory.h b/src/utils/amd_hip/hip/hcc_detail/hip_memory.h deleted file mode 100644 index 866b9e879..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_memory.h +++ /dev/null @@ -1,114 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H - -// Implementation of malloc and free device functions. -// HIP heap is implemented as a global array with fixed size. Users may define -// __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap. - -#if __HCC__ || __HIP__ - -// Size of page in bytes. -#ifndef __HIP_SIZE_OF_PAGE -#define __HIP_SIZE_OF_PAGE 64 -#endif - -// Total number of pages -#ifndef __HIP_NUM_PAGES -#define __HIP_NUM_PAGES (16 * 64 * 64) -#endif - -#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE) - -#if __HIP__ && __HIP_DEVICE_COMPILE__ -__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP]; -__attribute__((weak)) __device__ - uint32_t __hip_device_page_flag[__HIP_NUM_PAGES]; -#else -extern __device__ char __hip_device_heap[]; -extern __device__ uint32_t __hip_device_page_flag[]; -#endif - -extern "C" inline __device__ void* __hip_malloc(size_t size) { - char* heap = (char*)__hip_device_heap; - if (size > __HIP_SIZE_OF_HEAP) { - return (void*)nullptr; - } - uint32_t totalThreads = - hipBlockDim_x * hipGridDim_x * hipBlockDim_y - * hipGridDim_y * hipBlockDim_z * hipGridDim_z; - uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x - + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x - + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x - * hipBlockDim_y; - - uint32_t numHeapsPerWorkItem = __HIP_NUM_PAGES / totalThreads; - uint32_t heapSizePerWorkItem = __HIP_SIZE_OF_HEAP / totalThreads; - - uint32_t stride = size / __HIP_SIZE_OF_PAGE; - uint32_t start = numHeapsPerWorkItem * currentWorkItem; - - uint32_t k = 0; - - while (__hip_device_page_flag[k] > 0) { - k++; - } - - for (uint32_t i = 0; i < stride - 1; i++) { - __hip_device_page_flag[i + start + k] = 1; - } - - __hip_device_page_flag[start + stride - 1 + k] = 2; - - void* ptr = (void*)(heap - + heapSizePerWorkItem * currentWorkItem + k * __HIP_SIZE_OF_PAGE); - - return ptr; -} - -extern "C" inline __device__ void* __hip_free(void* ptr) { - if (ptr == nullptr) { - return nullptr; - } - - uint32_t offsetByte = (uint64_t)ptr - (uint64_t)__hip_device_heap; - uint32_t offsetPage = offsetByte / __HIP_SIZE_OF_PAGE; - - while (__hip_device_page_flag[offsetPage] != 0) { - if (__hip_device_page_flag[offsetPage] == 2) { - __hip_device_page_flag[offsetPage] = 0; - offsetPage++; - break; - } else { - __hip_device_page_flag[offsetPage] = 0; - offsetPage++; - } - } - - return nullptr; -} - -#endif - -#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_prof_api.h b/src/utils/amd_hip/hip/hcc_detail/hip_prof_api.h deleted file mode 100644 index eb3112bdb..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_prof_api.h +++ /dev/null @@ -1,204 +0,0 @@ -// automatically generated sources -#ifndef _HIP_PROF_API_H -#define _HIP_PROF_API_H - -#include -#include -#include - -#include "hip/hcc_detail/hip_prof_str.h" - -template -class api_callbacks_table_templ { - public: - typedef std::recursive_mutex mutex_t; - - typedef Record record_t; - typedef Fun fun_t; - typedef Act act_t; - - // HIP API callbacks table - struct hip_cb_table_entry_t { - volatile std::atomic sync; - volatile std::atomic sem; - act_t act; - void* a_arg; - fun_t fun; - void* arg; - }; - - struct hip_cb_table_t { - hip_cb_table_entry_t arr[HIP_API_ID_NUMBER]; - }; - - api_callbacks_table_templ() { - memset(&callbacks_table_, 0, sizeof(callbacks_table_)); - } - - bool set_activity(uint32_t id, act_t fun, void* arg) { - std::lock_guard lock(mutex_); - bool ret = true; - if (id == HIP_API_ID_ANY) { - for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_activity(i, fun, arg); - } else if (id < HIP_API_ID_NUMBER) { - cb_sync(id); - callbacks_table_.arr[id].act = fun; - callbacks_table_.arr[id].a_arg = arg; - cb_release(id); - } else { - ret = false; - } - return ret; - } - - bool set_callback(uint32_t id, fun_t fun, void* arg) { - std::lock_guard lock(mutex_); - bool ret = true; - if (id == HIP_API_ID_ANY) { - for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_callback(i, fun, arg); - } else if (id < HIP_API_ID_NUMBER) { - cb_sync(id); - callbacks_table_.arr[id].fun = fun; - callbacks_table_.arr[id].arg = arg; - cb_release(id); - } else { - ret = false; - } - return ret; - } - - inline hip_cb_table_entry_t& entry(const uint32_t& id) { - return callbacks_table_.arr[id]; - } - - inline void sem_sync(const uint32_t& id) { - sem_increment(id); - if (entry(id).sync.load() == true) sync_wait(id); - } - - inline void sem_release(const uint32_t& id) { - sem_decrement(id); - } - - private: - inline void cb_sync(const uint32_t& id) { - entry(id).sync.store(true); - while (entry(id).sem.load() != 0) {} - } - - inline void cb_release(const uint32_t& id) { - entry(id).sync.store(false); - } - - inline void sem_increment(const uint32_t& id) { - const uint32_t prev = entry(id).sem.fetch_add(1); - if (prev == UINT32_MAX) { - std::cerr << "sem overflow id = " << id << std::endl << std::flush; - abort(); - } - } - - inline void sem_decrement(const uint32_t& id) { - const uint32_t prev = entry(id).sem.fetch_sub(1); - if (prev == 0) { - std::cerr << "sem corrupted id = " << id << std::endl << std::flush; - abort(); - } - } - - void sync_wait(const uint32_t& id) { - sem_decrement(id); - while (entry(id).sync.load() == true) {} - sem_increment(id); - } - - mutex_t mutex_; - hip_cb_table_t callbacks_table_; -}; - - -#if USE_PROF_API -#include - -static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API; -typedef activity_record_t hip_api_record_t; -typedef activity_rtapi_callback_t hip_api_callback_t; -typedef activity_sync_callback_t hip_act_callback_t; - -// HIP API callbacks spawner object macro -#define HIP_CB_SPAWNER_OBJECT(CB_ID) \ - hip_api_data_t api_data{}; \ - INIT_CB_ARGS_DATA(CB_ID, api_data); \ - api_callbacks_spawner_t __api_tracer(HIP_API_ID_##CB_ID, api_data); - -typedef api_callbacks_table_templ api_callbacks_table_t; -extern api_callbacks_table_t callbacks_table; - -template -class api_callbacks_spawner_t { - public: - api_callbacks_spawner_t(const hip_api_id_t& cid, hip_api_data_t& api_data) : - api_data_(api_data), - record_({}) - { - if (cid_ >= HIP_API_ID_NUMBER) { - fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_); - abort(); - } - callbacks_table.sem_sync(cid_); - - act = entry(cid_).act; - a_arg = entry(cid_).a_arg; - fun = entry(cid_).fun; - arg = entry(cid_).arg; - - api_data_.phase = 0; - if (act != NULL) act(cid_, &record_, &api_data_, a_arg); - if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, &api_data_, arg); - } - - ~api_callbacks_spawner_t() { - api_data_.phase = 1; - if (act != NULL) act(cid_, &record_, &api_data_, a_arg); - if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, &api_data_, arg); - - callbacks_table.sem_release(cid_); - } - - private: - inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) { - return callbacks_table.entry(id); - } - - hip_api_data_t& api_data_; - hip_api_record_t record_; - - hip_act_callback_t act; - void* a_arg; - hip_api_callback_t fun; - void* arg; -}; - -template <> -class api_callbacks_spawner_t { - public: - api_callbacks_spawner_t(const hip_api_id_t& cid, hip_api_data_t& api_data) {} -}; - -#else - -#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0) - -class api_callbacks_table_t { - public: - typedef void* act_t; - typedef void* fun_t; - bool set_activity(uint32_t id, act_t fun, void* arg) { return false; } - bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; } -}; - -#endif - -#endif // _HIP_PROF_API_H diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_prof_str.h b/src/utils/amd_hip/hip/hcc_detail/hip_prof_str.h deleted file mode 100644 index 3a0d3e08e..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_prof_str.h +++ /dev/null @@ -1,2512 +0,0 @@ -// automatically generated sources -#ifndef _HIP_PROF_STR_H -#define _HIP_PROF_STR_H -#include -#include - -// Dummy API callbacks definition -#define INIT_NONE_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipHccGetAccelerator_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipHccGetAcceleratorView_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipDeviceCanAccessPeer2_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipMemcpyPeer2_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipMemcpyPeerAsync2_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipCreateTextureObject_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipDestroyTextureObject_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetTextureObjectResourceDesc_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetTextureObjectResourceViewDesc_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetTextureObjectTextureDesc_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipBindTexture_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipBindTexture2D_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipBindTextureToArray_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipBindTextureToMipmappedArray_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipUnbindTexture_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetChannelDesc_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetTextureAlignmentOffset_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetTextureReference_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipTexRefSetFormat_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipTexRefSetFlags_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipTexRefSetFilterMode_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipTexRefSetAddressMode_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipTexRefSetArray_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipTexRefSetAddress_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipTexRefSetAddress2D_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipMemcpyHtoH_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetErrorName_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetErrorString_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipCreateSurfaceObject_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipDestroySurfaceObject_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipStreamCreateWithPriority_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipDeviceGetStreamPriorityRange_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipStreamGetPriority_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetSymbolAddress_CB_ARGS_DATA(cb_data) {}; -#define INIT_hipGetSymbolSize_CB_ARGS_DATA(cb_data) {}; - -// HIP API callbacks ID enumaration -enum hip_api_id_t { - HIP_API_ID_hipHostFree = 0, - HIP_API_ID_hipMemcpyToSymbolAsync = 1, - HIP_API_ID_hipMallocPitch = 2, - HIP_API_ID_hipMalloc = 3, - HIP_API_ID_hipDeviceGetName = 4, - HIP_API_ID_hipEventRecord = 5, - HIP_API_ID_hipCtxSynchronize = 6, - HIP_API_ID_hipSetDevice = 7, - HIP_API_ID_hipSetupArgument = 8, - HIP_API_ID_hipMemcpyFromSymbolAsync = 9, - HIP_API_ID_hipMemcpyDtoD = 10, - HIP_API_ID_hipMemcpy2DToArray = 11, - HIP_API_ID_hipCtxGetCacheConfig = 12, - HIP_API_ID_hipStreamWaitEvent = 13, - HIP_API_ID_hipModuleLoad = 14, - HIP_API_ID_hipDevicePrimaryCtxSetFlags = 15, - HIP_API_ID_hipMemcpyAsync = 16, - HIP_API_ID_hipMalloc3DArray = 17, - HIP_API_ID_hipStreamCreate = 18, - HIP_API_ID_hipCtxGetCurrent = 19, - HIP_API_ID_hipDevicePrimaryCtxGetState = 20, - HIP_API_ID_hipEventQuery = 21, - HIP_API_ID_hipEventCreate = 22, - HIP_API_ID_hipMemGetAddressRange = 23, - HIP_API_ID_hipMemcpyFromSymbol = 24, - HIP_API_ID_hipArrayCreate = 25, - HIP_API_ID_hipStreamGetFlags = 26, - HIP_API_ID_hipMallocArray = 27, - HIP_API_ID_hipCtxGetSharedMemConfig = 28, - HIP_API_ID_hipMemPtrGetInfo = 29, - HIP_API_ID_hipCtxGetFlags = 30, - HIP_API_ID_hipStreamDestroy = 31, - HIP_API_ID_hipMemset3DAsync = 32, - HIP_API_ID_hipMemcpy3D = 33, - HIP_API_ID_hipInit = 34, - HIP_API_ID_hipMemcpyAtoH = 35, - HIP_API_ID_hipMemset2D = 36, - HIP_API_ID_hipMemset2DAsync = 37, - HIP_API_ID_hipDeviceCanAccessPeer = 38, - HIP_API_ID_hipDeviceEnablePeerAccess = 39, - HIP_API_ID_hipModuleUnload = 40, - HIP_API_ID_hipHostUnregister = 41, - HIP_API_ID_hipProfilerStop = 42, - HIP_API_ID_hipLaunchByPtr = 43, - HIP_API_ID_hipStreamSynchronize = 44, - HIP_API_ID_hipFreeHost = 45, - HIP_API_ID_hipRemoveApiCallback = 46, - HIP_API_ID_hipDeviceSetCacheConfig = 47, - HIP_API_ID_hipCtxGetApiVersion = 48, - HIP_API_ID_hipMemcpyHtoD = 49, - HIP_API_ID_hipModuleGetGlobal = 50, - HIP_API_ID_hipMemcpyHtoA = 51, - HIP_API_ID_hipCtxCreate = 52, - HIP_API_ID_hipMemcpy2D = 53, - HIP_API_ID_hipIpcCloseMemHandle = 54, - HIP_API_ID_hipChooseDevice = 55, - HIP_API_ID_hipDeviceSetSharedMemConfig = 56, - HIP_API_ID_hipDeviceComputeCapability = 57, - HIP_API_ID_hipRegisterApiCallback = 58, - HIP_API_ID_hipDeviceGet = 59, - HIP_API_ID_hipProfilerStart = 60, - HIP_API_ID_hipCtxSetCacheConfig = 61, - HIP_API_ID_hipFuncSetCacheConfig = 62, - HIP_API_ID_hipMemcpyPeerAsync = 63, - HIP_API_ID_hipEventElapsedTime = 64, - HIP_API_ID_hipDevicePrimaryCtxReset = 65, - HIP_API_ID_hipEventDestroy = 66, - HIP_API_ID_hipCtxPopCurrent = 67, - HIP_API_ID_hipHostGetFlags = 68, - HIP_API_ID_hipHostMalloc = 69, - HIP_API_ID_hipDriverGetVersion = 70, - HIP_API_ID_hipMemGetInfo = 71, - HIP_API_ID_hipDeviceReset = 72, - HIP_API_ID_hipMemset = 73, - HIP_API_ID_hipMemsetD8 = 74, - HIP_API_ID_hipHostRegister = 75, - HIP_API_ID_hipCtxSetSharedMemConfig = 76, - HIP_API_ID_hipArray3DCreate = 77, - HIP_API_ID_hipIpcOpenMemHandle = 78, - HIP_API_ID_hipGetLastError = 79, - HIP_API_ID_hipCtxDestroy = 80, - HIP_API_ID_hipDeviceGetSharedMemConfig = 81, - HIP_API_ID_hipRegisterActivityCallback = 82, - HIP_API_ID_hipSetDeviceFlags = 83, - HIP_API_ID_hipFree = 84, - HIP_API_ID_hipDeviceGetAttribute = 85, - HIP_API_ID_hipMemcpyDtoH = 86, - HIP_API_ID_hipCtxDisablePeerAccess = 87, - HIP_API_ID_hipDeviceGetByPCIBusId = 88, - HIP_API_ID_hipIpcGetMemHandle = 89, - HIP_API_ID_hipMemcpyHtoDAsync = 90, - HIP_API_ID_hipCtxGetDevice = 91, - HIP_API_ID_hipMemset3D = 92, - HIP_API_ID_hipModuleLoadData = 93, - HIP_API_ID_hipDeviceTotalMem = 94, - HIP_API_ID_hipCtxSetCurrent = 95, - HIP_API_ID_hipMallocHost = 96, - HIP_API_ID_hipDevicePrimaryCtxRetain = 97, - HIP_API_ID_hipDeviceDisablePeerAccess = 98, - HIP_API_ID_hipStreamCreateWithFlags = 99, - HIP_API_ID_hipMemcpyFromArray = 100, - HIP_API_ID_hipMemcpy2DAsync = 101, - HIP_API_ID_hipFuncGetAttributes = 102, - HIP_API_ID_hipEventCreateWithFlags = 103, - HIP_API_ID_hipStreamQuery = 104, - HIP_API_ID_hipDeviceGetPCIBusId = 105, - HIP_API_ID_hipMemcpy = 106, - HIP_API_ID_hipPeekAtLastError = 107, - HIP_API_ID_hipHostAlloc = 108, - HIP_API_ID_hipStreamAddCallback = 109, - HIP_API_ID_hipMemcpyToArray = 110, - HIP_API_ID_hipDeviceSynchronize = 111, - HIP_API_ID_hipDeviceGetCacheConfig = 112, - HIP_API_ID_hipMalloc3D = 113, - HIP_API_ID_hipPointerGetAttributes = 114, - HIP_API_ID_hipMemsetAsync = 115, - HIP_API_ID_hipMemcpyToSymbol = 116, - HIP_API_ID_hipCtxPushCurrent = 117, - HIP_API_ID_hipMemcpyPeer = 118, - HIP_API_ID_hipEventSynchronize = 119, - HIP_API_ID_hipMemcpyDtoDAsync = 120, - HIP_API_ID_hipCtxEnablePeerAccess = 121, - HIP_API_ID_hipMemcpyDtoHAsync = 122, - HIP_API_ID_hipModuleLaunchKernel = 123, - HIP_API_ID_hipModuleGetTexRef = 124, - HIP_API_ID_hipRemoveActivityCallback = 125, - HIP_API_ID_hipDeviceGetLimit = 126, - HIP_API_ID_hipModuleLoadDataEx = 127, - HIP_API_ID_hipRuntimeGetVersion = 128, - HIP_API_ID_hipGetDeviceProperties = 129, - HIP_API_ID_hipFreeArray = 130, - HIP_API_ID_hipDevicePrimaryCtxRelease = 131, - HIP_API_ID_hipHostGetDevicePointer = 132, - HIP_API_ID_hipMemcpyParam2D = 133, - HIP_API_ID_hipConfigureCall = 134, - HIP_API_ID_hipModuleGetFunction = 135, - HIP_API_ID_hipGetDevice = 136, - HIP_API_ID_hipGetDeviceCount = 137, - HIP_API_ID_hipHccModuleLaunchKernel = 138, - HIP_API_ID_NUMBER = 139, - HIP_API_ID_ANY = 140, - - HIP_API_ID_NONE = HIP_API_ID_NUMBER, - HIP_API_ID_hipHccGetAccelerator = HIP_API_ID_NUMBER, - HIP_API_ID_hipHccGetAcceleratorView = HIP_API_ID_NUMBER, - HIP_API_ID_hipDeviceCanAccessPeer2 = HIP_API_ID_NUMBER, - HIP_API_ID_hipMemcpyPeer2 = HIP_API_ID_NUMBER, - HIP_API_ID_hipMemcpyPeerAsync2 = HIP_API_ID_NUMBER, - HIP_API_ID_hipCreateTextureObject = HIP_API_ID_NUMBER, - HIP_API_ID_hipDestroyTextureObject = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetTextureObjectResourceDesc = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetTextureObjectResourceViewDesc = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetTextureObjectTextureDesc = HIP_API_ID_NUMBER, - HIP_API_ID_hipBindTexture = HIP_API_ID_NUMBER, - HIP_API_ID_hipBindTexture2D = HIP_API_ID_NUMBER, - HIP_API_ID_hipBindTextureToArray = HIP_API_ID_NUMBER, - HIP_API_ID_hipBindTextureToMipmappedArray = HIP_API_ID_NUMBER, - HIP_API_ID_hipUnbindTexture = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetChannelDesc = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetTextureAlignmentOffset = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetTextureReference = HIP_API_ID_NUMBER, - HIP_API_ID_hipTexRefSetFormat = HIP_API_ID_NUMBER, - HIP_API_ID_hipTexRefSetFlags = HIP_API_ID_NUMBER, - HIP_API_ID_hipTexRefSetFilterMode = HIP_API_ID_NUMBER, - HIP_API_ID_hipTexRefSetAddressMode = HIP_API_ID_NUMBER, - HIP_API_ID_hipTexRefSetArray = HIP_API_ID_NUMBER, - HIP_API_ID_hipTexRefSetAddress = HIP_API_ID_NUMBER, - HIP_API_ID_hipTexRefSetAddress2D = HIP_API_ID_NUMBER, - HIP_API_ID_hipMemcpyHtoH = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetErrorName = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetErrorString = HIP_API_ID_NUMBER, - HIP_API_ID_hipCreateSurfaceObject = HIP_API_ID_NUMBER, - HIP_API_ID_hipDestroySurfaceObject = HIP_API_ID_NUMBER, - HIP_API_ID_hipStreamCreateWithPriority = HIP_API_ID_NUMBER, - HIP_API_ID_hipDeviceGetStreamPriorityRange = HIP_API_ID_NUMBER, - HIP_API_ID_hipStreamGetPriority = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetSymbolAddress = HIP_API_ID_NUMBER, - HIP_API_ID_hipGetSymbolSize = HIP_API_ID_NUMBER, -}; - -// Return HIP API string -static const char* hip_api_name(const uint32_t& id) { - switch(id) { - case HIP_API_ID_hipHostFree: return "hipHostFree"; - case HIP_API_ID_hipMemcpyToSymbolAsync: return "hipMemcpyToSymbolAsync"; - case HIP_API_ID_hipMallocPitch: return "hipMallocPitch"; - case HIP_API_ID_hipMalloc: return "hipMalloc"; - case HIP_API_ID_hipDeviceGetName: return "hipDeviceGetName"; - case HIP_API_ID_hipEventRecord: return "hipEventRecord"; - case HIP_API_ID_hipCtxSynchronize: return "hipCtxSynchronize"; - case HIP_API_ID_hipSetDevice: return "hipSetDevice"; - case HIP_API_ID_hipSetupArgument: return "hipSetupArgument"; - case HIP_API_ID_hipMemcpyFromSymbolAsync: return "hipMemcpyFromSymbolAsync"; - case HIP_API_ID_hipMemcpyDtoD: return "hipMemcpyDtoD"; - case HIP_API_ID_hipMemcpy2DToArray: return "hipMemcpy2DToArray"; - case HIP_API_ID_hipCtxGetCacheConfig: return "hipCtxGetCacheConfig"; - case HIP_API_ID_hipStreamWaitEvent: return "hipStreamWaitEvent"; - case HIP_API_ID_hipModuleLoad: return "hipModuleLoad"; - case HIP_API_ID_hipDevicePrimaryCtxSetFlags: return "hipDevicePrimaryCtxSetFlags"; - case HIP_API_ID_hipMemcpyAsync: return "hipMemcpyAsync"; - case HIP_API_ID_hipMalloc3DArray: return "hipMalloc3DArray"; - case HIP_API_ID_hipStreamCreate: return "hipStreamCreate"; - case HIP_API_ID_hipCtxGetCurrent: return "hipCtxGetCurrent"; - case HIP_API_ID_hipDevicePrimaryCtxGetState: return "hipDevicePrimaryCtxGetState"; - case HIP_API_ID_hipEventQuery: return "hipEventQuery"; - case HIP_API_ID_hipEventCreate: return "hipEventCreate"; - case HIP_API_ID_hipMemGetAddressRange: return "hipMemGetAddressRange"; - case HIP_API_ID_hipMemcpyFromSymbol: return "hipMemcpyFromSymbol"; - case HIP_API_ID_hipArrayCreate: return "hipArrayCreate"; - case HIP_API_ID_hipStreamGetFlags: return "hipStreamGetFlags"; - case HIP_API_ID_hipMallocArray: return "hipMallocArray"; - case HIP_API_ID_hipCtxGetSharedMemConfig: return "hipCtxGetSharedMemConfig"; - case HIP_API_ID_hipMemPtrGetInfo: return "hipMemPtrGetInfo"; - case HIP_API_ID_hipCtxGetFlags: return "hipCtxGetFlags"; - case HIP_API_ID_hipStreamDestroy: return "hipStreamDestroy"; - case HIP_API_ID_hipMemset3DAsync: return "hipMemset3DAsync"; - case HIP_API_ID_hipMemcpy3D: return "hipMemcpy3D"; - case HIP_API_ID_hipInit: return "hipInit"; - case HIP_API_ID_hipMemcpyAtoH: return "hipMemcpyAtoH"; - case HIP_API_ID_hipMemset2D: return "hipMemset2D"; - case HIP_API_ID_hipMemset2DAsync: return "hipMemset2DAsync"; - case HIP_API_ID_hipDeviceCanAccessPeer: return "hipDeviceCanAccessPeer"; - case HIP_API_ID_hipDeviceEnablePeerAccess: return "hipDeviceEnablePeerAccess"; - case HIP_API_ID_hipModuleUnload: return "hipModuleUnload"; - case HIP_API_ID_hipHostUnregister: return "hipHostUnregister"; - case HIP_API_ID_hipProfilerStop: return "hipProfilerStop"; - case HIP_API_ID_hipLaunchByPtr: return "hipLaunchByPtr"; - case HIP_API_ID_hipStreamSynchronize: return "hipStreamSynchronize"; - case HIP_API_ID_hipFreeHost: return "hipFreeHost"; - case HIP_API_ID_hipRemoveApiCallback: return "hipRemoveApiCallback"; - case HIP_API_ID_hipDeviceSetCacheConfig: return "hipDeviceSetCacheConfig"; - case HIP_API_ID_hipCtxGetApiVersion: return "hipCtxGetApiVersion"; - case HIP_API_ID_hipMemcpyHtoD: return "hipMemcpyHtoD"; - case HIP_API_ID_hipModuleGetGlobal: return "hipModuleGetGlobal"; - case HIP_API_ID_hipMemcpyHtoA: return "hipMemcpyHtoA"; - case HIP_API_ID_hipCtxCreate: return "hipCtxCreate"; - case HIP_API_ID_hipMemcpy2D: return "hipMemcpy2D"; - case HIP_API_ID_hipIpcCloseMemHandle: return "hipIpcCloseMemHandle"; - case HIP_API_ID_hipChooseDevice: return "hipChooseDevice"; - case HIP_API_ID_hipDeviceSetSharedMemConfig: return "hipDeviceSetSharedMemConfig"; - case HIP_API_ID_hipDeviceComputeCapability: return "hipDeviceComputeCapability"; - case HIP_API_ID_hipRegisterApiCallback: return "hipRegisterApiCallback"; - case HIP_API_ID_hipDeviceGet: return "hipDeviceGet"; - case HIP_API_ID_hipProfilerStart: return "hipProfilerStart"; - case HIP_API_ID_hipCtxSetCacheConfig: return "hipCtxSetCacheConfig"; - case HIP_API_ID_hipFuncSetCacheConfig: return "hipFuncSetCacheConfig"; - case HIP_API_ID_hipMemcpyPeerAsync: return "hipMemcpyPeerAsync"; - case HIP_API_ID_hipEventElapsedTime: return "hipEventElapsedTime"; - case HIP_API_ID_hipDevicePrimaryCtxReset: return "hipDevicePrimaryCtxReset"; - case HIP_API_ID_hipEventDestroy: return "hipEventDestroy"; - case HIP_API_ID_hipCtxPopCurrent: return "hipCtxPopCurrent"; - case HIP_API_ID_hipHostGetFlags: return "hipHostGetFlags"; - case HIP_API_ID_hipHostMalloc: return "hipHostMalloc"; - case HIP_API_ID_hipDriverGetVersion: return "hipDriverGetVersion"; - case HIP_API_ID_hipMemGetInfo: return "hipMemGetInfo"; - case HIP_API_ID_hipDeviceReset: return "hipDeviceReset"; - case HIP_API_ID_hipMemset: return "hipMemset"; - case HIP_API_ID_hipMemsetD8: return "hipMemsetD8"; - case HIP_API_ID_hipHostRegister: return "hipHostRegister"; - case HIP_API_ID_hipCtxSetSharedMemConfig: return "hipCtxSetSharedMemConfig"; - case HIP_API_ID_hipArray3DCreate: return "hipArray3DCreate"; - case HIP_API_ID_hipIpcOpenMemHandle: return "hipIpcOpenMemHandle"; - case HIP_API_ID_hipGetLastError: return "hipGetLastError"; - case HIP_API_ID_hipCtxDestroy: return "hipCtxDestroy"; - case HIP_API_ID_hipDeviceGetSharedMemConfig: return "hipDeviceGetSharedMemConfig"; - case HIP_API_ID_hipRegisterActivityCallback: return "hipRegisterActivityCallback"; - case HIP_API_ID_hipSetDeviceFlags: return "hipSetDeviceFlags"; - case HIP_API_ID_hipFree: return "hipFree"; - case HIP_API_ID_hipDeviceGetAttribute: return "hipDeviceGetAttribute"; - case HIP_API_ID_hipMemcpyDtoH: return "hipMemcpyDtoH"; - case HIP_API_ID_hipCtxDisablePeerAccess: return "hipCtxDisablePeerAccess"; - case HIP_API_ID_hipDeviceGetByPCIBusId: return "hipDeviceGetByPCIBusId"; - case HIP_API_ID_hipIpcGetMemHandle: return "hipIpcGetMemHandle"; - case HIP_API_ID_hipMemcpyHtoDAsync: return "hipMemcpyHtoDAsync"; - case HIP_API_ID_hipCtxGetDevice: return "hipCtxGetDevice"; - case HIP_API_ID_hipMemset3D: return "hipMemset3D"; - case HIP_API_ID_hipModuleLoadData: return "hipModuleLoadData"; - case HIP_API_ID_hipDeviceTotalMem: return "hipDeviceTotalMem"; - case HIP_API_ID_hipCtxSetCurrent: return "hipCtxSetCurrent"; - case HIP_API_ID_hipMallocHost: return "hipMallocHost"; - case HIP_API_ID_hipDevicePrimaryCtxRetain: return "hipDevicePrimaryCtxRetain"; - case HIP_API_ID_hipDeviceDisablePeerAccess: return "hipDeviceDisablePeerAccess"; - case HIP_API_ID_hipStreamCreateWithFlags: return "hipStreamCreateWithFlags"; - case HIP_API_ID_hipMemcpyFromArray: return "hipMemcpyFromArray"; - case HIP_API_ID_hipMemcpy2DAsync: return "hipMemcpy2DAsync"; - case HIP_API_ID_hipFuncGetAttributes: return "hipFuncGetAttributes"; - case HIP_API_ID_hipEventCreateWithFlags: return "hipEventCreateWithFlags"; - case HIP_API_ID_hipStreamQuery: return "hipStreamQuery"; - case HIP_API_ID_hipDeviceGetPCIBusId: return "hipDeviceGetPCIBusId"; - case HIP_API_ID_hipMemcpy: return "hipMemcpy"; - case HIP_API_ID_hipPeekAtLastError: return "hipPeekAtLastError"; - case HIP_API_ID_hipHostAlloc: return "hipHostAlloc"; - case HIP_API_ID_hipStreamAddCallback: return "hipStreamAddCallback"; - case HIP_API_ID_hipMemcpyToArray: return "hipMemcpyToArray"; - case HIP_API_ID_hipDeviceSynchronize: return "hipDeviceSynchronize"; - case HIP_API_ID_hipDeviceGetCacheConfig: return "hipDeviceGetCacheConfig"; - case HIP_API_ID_hipMalloc3D: return "hipMalloc3D"; - case HIP_API_ID_hipPointerGetAttributes: return "hipPointerGetAttributes"; - case HIP_API_ID_hipMemsetAsync: return "hipMemsetAsync"; - case HIP_API_ID_hipMemcpyToSymbol: return "hipMemcpyToSymbol"; - case HIP_API_ID_hipCtxPushCurrent: return "hipCtxPushCurrent"; - case HIP_API_ID_hipMemcpyPeer: return "hipMemcpyPeer"; - case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize"; - case HIP_API_ID_hipMemcpyDtoDAsync: return "hipMemcpyDtoDAsync"; - case HIP_API_ID_hipCtxEnablePeerAccess: return "hipCtxEnablePeerAccess"; - case HIP_API_ID_hipMemcpyDtoHAsync: return "hipMemcpyDtoHAsync"; - case HIP_API_ID_hipModuleLaunchKernel: return "hipModuleLaunchKernel"; - case HIP_API_ID_hipModuleGetTexRef: return "hipModuleGetTexRef"; - case HIP_API_ID_hipRemoveActivityCallback: return "hipRemoveActivityCallback"; - case HIP_API_ID_hipDeviceGetLimit: return "hipDeviceGetLimit"; - case HIP_API_ID_hipModuleLoadDataEx: return "hipModuleLoadDataEx"; - case HIP_API_ID_hipRuntimeGetVersion: return "hipRuntimeGetVersion"; - case HIP_API_ID_hipGetDeviceProperties: return "hipGetDeviceProperties"; - case HIP_API_ID_hipFreeArray: return "hipFreeArray"; - case HIP_API_ID_hipDevicePrimaryCtxRelease: return "hipDevicePrimaryCtxRelease"; - case HIP_API_ID_hipHostGetDevicePointer: return "hipHostGetDevicePointer"; - case HIP_API_ID_hipMemcpyParam2D: return "hipMemcpyParam2D"; - case HIP_API_ID_hipConfigureCall: return "hipConfigureCall"; - case HIP_API_ID_hipModuleGetFunction: return "hipModuleGetFunction"; - case HIP_API_ID_hipGetDevice: return "hipGetDevice"; - case HIP_API_ID_hipGetDeviceCount: return "hipGetDeviceCount"; - }; - return "unknown"; -}; - -// HIP API callbacks data structure -struct hip_api_data_t { - uint64_t correlation_id; - uint32_t phase; - union { - struct { - void* ptr; - } hipHostFree; - struct { - const void* symbolName; - const void* src; - size_t sizeBytes; - size_t offset; - hipMemcpyKind kind; - hipStream_t stream; - } hipMemcpyToSymbolAsync; - struct { - void** ptr; - size_t* pitch; - size_t width; - size_t height; - } hipMallocPitch; - struct { - void** ptr; - size_t size; - } hipMalloc; - struct { - char* name; - int len; - hipDevice_t device; - } hipDeviceGetName; - struct { - hipEvent_t event; - hipStream_t stream; - } hipEventRecord; - struct { - int deviceId; - } hipSetDevice; - struct { - const void* arg; - size_t size; - size_t offset; - } hipSetupArgument; - struct { - void* dst; - const void* symbolName; - size_t sizeBytes; - size_t offset; - hipMemcpyKind kind; - hipStream_t stream; - } hipMemcpyFromSymbolAsync; - struct { - hipDeviceptr_t dst; - hipDeviceptr_t src; - size_t sizeBytes; - } hipMemcpyDtoD; - struct { - hipArray* dst; - size_t wOffset; - size_t hOffset; - const void* src; - size_t spitch; - size_t width; - size_t height; - hipMemcpyKind kind; - } hipMemcpy2DToArray; - struct { - hipFuncCache_t* cacheConfig; - } hipCtxGetCacheConfig; - struct { - hipStream_t stream; - hipEvent_t event; - unsigned int flags; - } hipStreamWaitEvent; - struct { - hipModule_t* module; - const char* fname; - } hipModuleLoad; - struct { - hipDevice_t dev; - unsigned int flags; - } hipDevicePrimaryCtxSetFlags; - struct { - void* dst; - const void* src; - size_t sizeBytes; - hipMemcpyKind kind; - hipStream_t stream; - } hipMemcpyAsync; - struct { - hipArray** array; - const hipChannelFormatDesc* desc; - hipExtent extent; - unsigned int flags; - } hipMalloc3DArray; - struct { - hipStream_t* stream; - } hipStreamCreate; - struct { - hipCtx_t* ctx; - } hipCtxGetCurrent; - struct { - hipDevice_t dev; - unsigned int* flags; - int* active; - } hipDevicePrimaryCtxGetState; - struct { - hipEvent_t event; - } hipEventQuery; - struct { - hipEvent_t* event; - } hipEventCreate; - struct { - hipDeviceptr_t* pbase; - size_t* psize; - hipDeviceptr_t dptr; - } hipMemGetAddressRange; - struct { - void* dst; - const void* symbolName; - size_t sizeBytes; - size_t offset; - hipMemcpyKind kind; - } hipMemcpyFromSymbol; - struct { - hipArray** pHandle; - const HIP_ARRAY_DESCRIPTOR* pAllocateArray; - } hipArrayCreate; - struct { - hipStream_t stream; - unsigned int* flags; - } hipStreamGetFlags; - struct { - hipArray** array; - const hipChannelFormatDesc* desc; - size_t width; - size_t height; - unsigned int flags; - } hipMallocArray; - struct { - hipSharedMemConfig* pConfig; - } hipCtxGetSharedMemConfig; - struct { - void* ptr; - size_t* size; - } hipMemPtrGetInfo; - struct { - unsigned int* flags; - } hipCtxGetFlags; - struct { - hipStream_t stream; - } hipStreamDestroy; - struct { - hipPitchedPtr pitchedDevPtr; - int value; - hipExtent extent; - hipStream_t stream; - } hipMemset3DAsync; - struct { - const hipMemcpy3DParms* p; - } hipMemcpy3D; - struct { - unsigned int flags; - } hipInit; - struct { - void* dst; - hipArray* srcArray; - size_t srcOffset; - size_t count; - } hipMemcpyAtoH; - struct { - void* dst; - size_t pitch; - int value; - size_t width; - size_t height; - } hipMemset2D; - struct { - void* dst; - size_t pitch; - int value; - size_t width; - size_t height; - hipStream_t stream; - } hipMemset2DAsync; - struct { - int* canAccessPeer; - int deviceId; - int peerDeviceId; - } hipDeviceCanAccessPeer; - struct { - int peerDeviceId; - unsigned int flags; - } hipDeviceEnablePeerAccess; - struct { - hipModule_t module; - } hipModuleUnload; - struct { - void* hostPtr; - } hipHostUnregister; - struct { - const void* func; - } hipLaunchByPtr; - struct { - hipStream_t stream; - } hipStreamSynchronize; - struct { - void* ptr; - } hipFreeHost; - struct { - uint32_t id; - } hipRemoveApiCallback; - struct { - hipFuncCache_t cacheConfig; - } hipDeviceSetCacheConfig; - struct { - hipCtx_t ctx; - int* apiVersion; - } hipCtxGetApiVersion; - struct { - hipDeviceptr_t dst; - void* src; - size_t sizeBytes; - } hipMemcpyHtoD; - struct { - hipDeviceptr_t* dptr; - size_t* bytes; - hipModule_t hmod; - const char* name; - } hipModuleGetGlobal; - struct { - hipArray* dstArray; - size_t dstOffset; - const void* srcHost; - size_t count; - } hipMemcpyHtoA; - struct { - hipCtx_t* ctx; - unsigned int flags; - hipDevice_t device; - } hipCtxCreate; - struct { - void* dst; - size_t dpitch; - const void* src; - size_t spitch; - size_t width; - size_t height; - hipMemcpyKind kind; - } hipMemcpy2D; - struct { - void* devPtr; - } hipIpcCloseMemHandle; - struct { - int* device; - const hipDeviceProp_t* prop; - } hipChooseDevice; - struct { - hipSharedMemConfig config; - } hipDeviceSetSharedMemConfig; - struct { - int* major; - int* minor; - hipDevice_t device; - } hipDeviceComputeCapability; - struct { - uint32_t id; - void* fun; - void* arg; - } hipRegisterApiCallback; - struct { - hipDevice_t* device; - int ordinal; - } hipDeviceGet; - struct { - hipFuncCache_t cacheConfig; - } hipCtxSetCacheConfig; - struct { - const void* func; - hipFuncCache_t config; - } hipFuncSetCacheConfig; - struct { - void* dst; - int dstDeviceId; - const void* src; - int srcDevice; - size_t sizeBytes; - hipStream_t stream; - } hipMemcpyPeerAsync; - struct { - float* ms; - hipEvent_t start; - hipEvent_t stop; - } hipEventElapsedTime; - struct { - hipDevice_t dev; - } hipDevicePrimaryCtxReset; - struct { - hipEvent_t event; - } hipEventDestroy; - struct { - hipCtx_t* ctx; - } hipCtxPopCurrent; - struct { - unsigned int* flagsPtr; - void* hostPtr; - } hipHostGetFlags; - struct { - void** ptr; - size_t size; - unsigned int flags; - } hipHostMalloc; - struct { - int* driverVersion; - } hipDriverGetVersion; - struct { - size_t* free; - size_t* total; - } hipMemGetInfo; - struct { - void* dst; - int value; - size_t sizeBytes; - } hipMemset; - struct { - hipDeviceptr_t dest; - unsigned char value; - size_t sizeBytes; - } hipMemsetD8; - struct { - void* hostPtr; - size_t sizeBytes; - unsigned int flags; - } hipHostRegister; - struct { - hipSharedMemConfig config; - } hipCtxSetSharedMemConfig; - struct { - hipArray** array; - const HIP_ARRAY_DESCRIPTOR* pAllocateArray; - } hipArray3DCreate; - struct { - void** devPtr; - hipIpcMemHandle_t handle; - unsigned int flags; - } hipIpcOpenMemHandle; - struct { - hipCtx_t ctx; - } hipCtxDestroy; - struct { - hipSharedMemConfig* pConfig; - } hipDeviceGetSharedMemConfig; - struct { - uint32_t id; - void* fun; - void* arg; - } hipRegisterActivityCallback; - struct { - unsigned flags; - } hipSetDeviceFlags; - struct { - void* ptr; - } hipFree; - struct { - int* pi; - hipDeviceAttribute_t attr; - int deviceId; - } hipDeviceGetAttribute; - struct { - void* dst; - hipDeviceptr_t src; - size_t sizeBytes; - } hipMemcpyDtoH; - struct { - hipCtx_t peerCtx; - } hipCtxDisablePeerAccess; - struct { - int* device; - const char* pciBusId; - } hipDeviceGetByPCIBusId; - struct { - hipIpcMemHandle_t* handle; - void* devPtr; - } hipIpcGetMemHandle; - struct { - hipDeviceptr_t dst; - void* src; - size_t sizeBytes; - hipStream_t stream; - } hipMemcpyHtoDAsync; - struct { - hipDevice_t* device; - } hipCtxGetDevice; - struct { - hipPitchedPtr pitchedDevPtr; - int value; - hipExtent extent; - } hipMemset3D; - struct { - hipModule_t* module; - const void* image; - } hipModuleLoadData; - struct { - size_t* bytes; - hipDevice_t device; - } hipDeviceTotalMem; - struct { - hipCtx_t ctx; - } hipCtxSetCurrent; - struct { - void** ptr; - size_t size; - } hipMallocHost; - struct { - hipCtx_t* pctx; - hipDevice_t dev; - } hipDevicePrimaryCtxRetain; - struct { - int peerDeviceId; - } hipDeviceDisablePeerAccess; - struct { - hipStream_t* stream; - unsigned int flags; - } hipStreamCreateWithFlags; - struct { - void* dst; - hipArray_const_t srcArray; - size_t wOffset; - size_t hOffset; - size_t count; - hipMemcpyKind kind; - } hipMemcpyFromArray; - struct { - void* dst; - size_t dpitch; - const void* src; - size_t spitch; - size_t width; - size_t height; - hipMemcpyKind kind; - hipStream_t stream; - } hipMemcpy2DAsync; - struct { - hipFuncAttributes* attr; - const void* func; - } hipFuncGetAttributes; - struct { - hipEvent_t* event; - unsigned flags; - } hipEventCreateWithFlags; - struct { - hipStream_t stream; - } hipStreamQuery; - struct { - char* pciBusId; - int len; - int device; - } hipDeviceGetPCIBusId; - struct { - void* dst; - const void* src; - size_t sizeBytes; - hipMemcpyKind kind; - } hipMemcpy; - struct { - void** ptr; - size_t size; - unsigned int flags; - } hipHostAlloc; - struct { - hipStream_t stream; - hipStreamCallback_t callback; - void* userData; - unsigned int flags; - } hipStreamAddCallback; - struct { - hipArray* dst; - size_t wOffset; - size_t hOffset; - const void* src; - size_t count; - hipMemcpyKind kind; - } hipMemcpyToArray; - struct { - hipFuncCache_t* cacheConfig; - } hipDeviceGetCacheConfig; - struct { - hipPitchedPtr* pitchedDevPtr; - hipExtent extent; - } hipMalloc3D; - struct { - hipPointerAttribute_t* attributes; - const void* ptr; - } hipPointerGetAttributes; - struct { - void* dst; - int value; - size_t sizeBytes; - hipStream_t stream; - } hipMemsetAsync; - struct { - const void* symbolName; - const void* src; - size_t sizeBytes; - size_t offset; - hipMemcpyKind kind; - } hipMemcpyToSymbol; - struct { - hipCtx_t ctx; - } hipCtxPushCurrent; - struct { - void* dst; - int dstDeviceId; - const void* src; - int srcDeviceId; - size_t sizeBytes; - } hipMemcpyPeer; - struct { - hipEvent_t event; - } hipEventSynchronize; - struct { - hipDeviceptr_t dst; - hipDeviceptr_t src; - size_t sizeBytes; - hipStream_t stream; - } hipMemcpyDtoDAsync; - struct { - hipCtx_t peerCtx; - unsigned int flags; - } hipCtxEnablePeerAccess; - struct { - void* dst; - hipDeviceptr_t src; - size_t sizeBytes; - hipStream_t stream; - } hipMemcpyDtoHAsync; - struct { - hipFunction_t f; - unsigned int gridDimX; - unsigned int gridDimY; - unsigned int gridDimZ; - unsigned int blockDimX; - unsigned int blockDimY; - unsigned int blockDimZ; - unsigned int sharedMemBytes; - hipStream_t stream; - void** kernelParams; - void** extra; - } hipModuleLaunchKernel; - struct { - hipFunction_t f; - } hipHccModuleLaunchKernel; - struct { - textureReference** texRef; - hipModule_t hmod; - const char* name; - } hipModuleGetTexRef; - struct { - uint32_t id; - } hipRemoveActivityCallback; - struct { - size_t* pValue; - hipLimit_t limit; - } hipDeviceGetLimit; - struct { - hipModule_t* module; - const void* image; - unsigned int numOptions; - hipJitOption* options; - void** optionValues; - } hipModuleLoadDataEx; - struct { - int* runtimeVersion; - } hipRuntimeGetVersion; - struct { - hipDeviceProp_t* prop; - int deviceId; - } hipGetDeviceProperties; - struct { - hipArray* array; - } hipFreeArray; - struct { - hipDevice_t dev; - } hipDevicePrimaryCtxRelease; - struct { - void** devPtr; - void* hstPtr; - unsigned int flags; - } hipHostGetDevicePointer; - struct { - const hip_Memcpy2D* pCopy; - } hipMemcpyParam2D; - struct { - dim3 gridDim; - dim3 blockDim; - size_t sharedMem; - hipStream_t stream; - } hipConfigureCall; - struct { - hipFunction_t* function; - hipModule_t module; - const char* kname; - } hipModuleGetFunction; - struct { - int* deviceId; - } hipGetDevice; - struct { - int* count; - } hipGetDeviceCount; - } args; -}; - -// HIP API callbacks args data filling macros -#define INIT_hipHostFree_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipHostFree.ptr = (void*)ptr; \ -}; -#define INIT_hipMemcpyToSymbolAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyToSymbolAsync.symbolName = (const void*)symbolName; \ - cb_data.args.hipMemcpyToSymbolAsync.src = (const void*)src; \ - cb_data.args.hipMemcpyToSymbolAsync.sizeBytes = (size_t)count; \ - cb_data.args.hipMemcpyToSymbolAsync.offset = (size_t)offset; \ - cb_data.args.hipMemcpyToSymbolAsync.kind = (hipMemcpyKind)kind; \ - cb_data.args.hipMemcpyToSymbolAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipMallocPitch_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMallocPitch.ptr = (void**)ptr; \ - cb_data.args.hipMallocPitch.pitch = (size_t*)pitch; \ - cb_data.args.hipMallocPitch.width = (size_t)width; \ - cb_data.args.hipMallocPitch.height = (size_t)height; \ -}; -#define INIT_hipMalloc_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMalloc.ptr = (void**)ptr; \ - cb_data.args.hipMalloc.size = (size_t)sizeBytes; \ -}; -#define INIT_hipDeviceGetName_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceGetName.name = (char*)name; \ - cb_data.args.hipDeviceGetName.len = (int)len; \ - cb_data.args.hipDeviceGetName.device = (hipDevice_t)device; \ -}; -#define INIT_hipEventRecord_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipEventRecord.event = (hipEvent_t)event; \ - cb_data.args.hipEventRecord.stream = (hipStream_t)stream; \ -}; -#define INIT_hipCtxSynchronize_CB_ARGS_DATA(cb_data) { \ -}; -#define INIT_hipSetDevice_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipSetDevice.deviceId = (int)deviceId; \ -}; -#define INIT_hipSetupArgument_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipSetupArgument.arg = (const void*)arg; \ - cb_data.args.hipSetupArgument.size = (size_t)size; \ - cb_data.args.hipSetupArgument.offset = (size_t)offset; \ -}; -#define INIT_hipMemcpyFromSymbolAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyFromSymbolAsync.dst = (void*)dst; \ - cb_data.args.hipMemcpyFromSymbolAsync.symbolName = (const void*)symbolName; \ - cb_data.args.hipMemcpyFromSymbolAsync.sizeBytes = (size_t)count; \ - cb_data.args.hipMemcpyFromSymbolAsync.offset = (size_t)offset; \ - cb_data.args.hipMemcpyFromSymbolAsync.kind = (hipMemcpyKind)kind; \ - cb_data.args.hipMemcpyFromSymbolAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipMemcpyDtoD_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyDtoD.dst = (hipDeviceptr_t)dst; \ - cb_data.args.hipMemcpyDtoD.src = (hipDeviceptr_t)src; \ - cb_data.args.hipMemcpyDtoD.sizeBytes = (size_t)sizeBytes; \ -}; -#define INIT_hipMemcpy2DToArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpy2DToArray.dst = (hipArray*)dst; \ - cb_data.args.hipMemcpy2DToArray.wOffset = (size_t)wOffset; \ - cb_data.args.hipMemcpy2DToArray.hOffset = (size_t)hOffset; \ - cb_data.args.hipMemcpy2DToArray.src = (const void*)src; \ - cb_data.args.hipMemcpy2DToArray.spitch = (size_t)spitch; \ - cb_data.args.hipMemcpy2DToArray.width = (size_t)width; \ - cb_data.args.hipMemcpy2DToArray.height = (size_t)height; \ - cb_data.args.hipMemcpy2DToArray.kind = (hipMemcpyKind)kind; \ -}; -#define INIT_hipCtxGetCacheConfig_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \ -}; -#define INIT_hipStreamWaitEvent_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipStreamWaitEvent.stream = (hipStream_t)stream; \ - cb_data.args.hipStreamWaitEvent.event = (hipEvent_t)event; \ - cb_data.args.hipStreamWaitEvent.flags = (unsigned int)flags; \ -}; -#define INIT_hipModuleLoad_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipModuleLoad.module = (hipModule_t*)module; \ - cb_data.args.hipModuleLoad.fname = (const char*)fname; \ -}; -#define INIT_hipDevicePrimaryCtxSetFlags_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDevicePrimaryCtxSetFlags.dev = (hipDevice_t)dev; \ - cb_data.args.hipDevicePrimaryCtxSetFlags.flags = (unsigned int)flags; \ -}; -#define INIT_hipMemcpyAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyAsync.dst = (void*)dst; \ - cb_data.args.hipMemcpyAsync.src = (const void*)src; \ - cb_data.args.hipMemcpyAsync.sizeBytes = (size_t)sizeBytes; \ - cb_data.args.hipMemcpyAsync.kind = (hipMemcpyKind)kind; \ - cb_data.args.hipMemcpyAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipMalloc3DArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMalloc3DArray.array = (hipArray**)array; \ - cb_data.args.hipMalloc3DArray.desc = (const hipChannelFormatDesc*)desc; \ - cb_data.args.hipMalloc3DArray.extent = (hipExtent)extent; \ - cb_data.args.hipMalloc3DArray.flags = (unsigned int)flags; \ -}; -#define INIT_hipStreamCreate_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipStreamCreate.stream = (hipStream_t*)stream; \ -}; -#define INIT_hipCtxGetCurrent_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxGetCurrent.ctx = (hipCtx_t*)ctx; \ -}; -#define INIT_hipDevicePrimaryCtxGetState_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDevicePrimaryCtxGetState.dev = (hipDevice_t)dev; \ - cb_data.args.hipDevicePrimaryCtxGetState.flags = (unsigned int*)flags; \ - cb_data.args.hipDevicePrimaryCtxGetState.active = (int*)active; \ -}; -#define INIT_hipEventQuery_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipEventQuery.event = (hipEvent_t)event; \ -}; -#define INIT_hipEventCreate_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipEventCreate.event = (hipEvent_t*)event; \ -}; -#define INIT_hipMemGetAddressRange_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemGetAddressRange.pbase = (hipDeviceptr_t*)pbase; \ - cb_data.args.hipMemGetAddressRange.psize = (size_t*)psize; \ - cb_data.args.hipMemGetAddressRange.dptr = (hipDeviceptr_t)dptr; \ -}; -#define INIT_hipMemcpyFromSymbol_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyFromSymbol.dst = (void*)dst; \ - cb_data.args.hipMemcpyFromSymbol.symbolName = (const void*)symbolName; \ - cb_data.args.hipMemcpyFromSymbol.sizeBytes = (size_t)count; \ - cb_data.args.hipMemcpyFromSymbol.offset = (size_t)offset; \ - cb_data.args.hipMemcpyFromSymbol.kind = (hipMemcpyKind)kind; \ -}; -#define INIT_hipArrayCreate_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipArrayCreate.pHandle = (hipArray**)array; \ - cb_data.args.hipArrayCreate.pAllocateArray = (const HIP_ARRAY_DESCRIPTOR*)pAllocateArray; \ -}; -#define INIT_hipStreamGetFlags_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipStreamGetFlags.stream = (hipStream_t)stream; \ - cb_data.args.hipStreamGetFlags.flags = (unsigned int*)flags; \ -}; -#define INIT_hipMallocArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMallocArray.array = (hipArray**)array; \ - cb_data.args.hipMallocArray.desc = (const hipChannelFormatDesc*)desc; \ - cb_data.args.hipMallocArray.width = (size_t)width; \ - cb_data.args.hipMallocArray.height = (size_t)height; \ - cb_data.args.hipMallocArray.flags = (unsigned int)flags; \ -}; -#define INIT_hipCtxGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \ -}; -#define INIT_hipMemPtrGetInfo_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemPtrGetInfo.ptr = (void*)ptr; \ - cb_data.args.hipMemPtrGetInfo.size = (size_t*)size; \ -}; -#define INIT_hipCtxGetFlags_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxGetFlags.flags = (unsigned int*)flags; \ -}; -#define INIT_hipStreamDestroy_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipStreamDestroy.stream = (hipStream_t)stream; \ -}; -#define INIT_hipMemset3DAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemset3DAsync.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \ - cb_data.args.hipMemset3DAsync.value = (int)value; \ - cb_data.args.hipMemset3DAsync.extent = (hipExtent)extent; \ - cb_data.args.hipMemset3DAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipMemcpy3D_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpy3D.p = (const hipMemcpy3DParms*)p; \ -}; -#define INIT_hipInit_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipInit.flags = (unsigned int)flags; \ -}; -#define INIT_hipMemcpyAtoH_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyAtoH.dst = (void*)dst; \ - cb_data.args.hipMemcpyAtoH.srcArray = (hipArray*)srcArray; \ - cb_data.args.hipMemcpyAtoH.srcOffset = (size_t)srcOffset; \ - cb_data.args.hipMemcpyAtoH.count = (size_t)count; \ -}; -#define INIT_hipMemset2D_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemset2D.dst = (void*)dst; \ - cb_data.args.hipMemset2D.pitch = (size_t)pitch; \ - cb_data.args.hipMemset2D.value = (int)value; \ - cb_data.args.hipMemset2D.width = (size_t)width; \ - cb_data.args.hipMemset2D.height = (size_t)height; \ -}; -#define INIT_hipMemset2DAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemset2DAsync.dst = (void*)dst; \ - cb_data.args.hipMemset2DAsync.pitch = (size_t)pitch; \ - cb_data.args.hipMemset2DAsync.value = (int)value; \ - cb_data.args.hipMemset2DAsync.width = (size_t)width; \ - cb_data.args.hipMemset2DAsync.height = (size_t)height; \ - cb_data.args.hipMemset2DAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipDeviceCanAccessPeer_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceCanAccessPeer.canAccessPeer = (int*)canAccessPeer; \ - cb_data.args.hipDeviceCanAccessPeer.deviceId = (int)deviceId; \ - cb_data.args.hipDeviceCanAccessPeer.peerDeviceId = (int)peerDeviceId; \ -}; -#define INIT_hipDeviceEnablePeerAccess_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceEnablePeerAccess.peerDeviceId = (int)peerDeviceId; \ - cb_data.args.hipDeviceEnablePeerAccess.flags = (unsigned int)flags; \ -}; -#define INIT_hipModuleUnload_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipModuleUnload.module = (hipModule_t)hmod; \ -}; -#define INIT_hipHostUnregister_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipHostUnregister.hostPtr = (void*)hostPtr; \ -}; -#define INIT_hipProfilerStop_CB_ARGS_DATA(cb_data) { \ -}; -#define INIT_hipLaunchByPtr_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipLaunchByPtr.func = (const void*)hostFunction; \ -}; -#define INIT_hipStreamSynchronize_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipStreamSynchronize.stream = (hipStream_t)stream; \ -}; -#define INIT_hipFreeHost_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipFreeHost.ptr = (void*)ptr; \ -}; -#define INIT_hipRemoveApiCallback_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipRemoveApiCallback.id = (uint32_t)id; \ -}; -#define INIT_hipDeviceSetCacheConfig_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \ -}; -#define INIT_hipCtxGetApiVersion_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxGetApiVersion.ctx = (hipCtx_t)ctx; \ - cb_data.args.hipCtxGetApiVersion.apiVersion = (int*)apiVersion; \ -}; -#define INIT_hipMemcpyHtoD_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyHtoD.dst = (hipDeviceptr_t)dst; \ - cb_data.args.hipMemcpyHtoD.src = (void*)src; \ - cb_data.args.hipMemcpyHtoD.sizeBytes = (size_t)sizeBytes; \ -}; -#define INIT_hipModuleGetGlobal_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipModuleGetGlobal.dptr = (hipDeviceptr_t*)dptr; \ - cb_data.args.hipModuleGetGlobal.bytes = (size_t*)bytes; \ - cb_data.args.hipModuleGetGlobal.hmod = (hipModule_t)hmod; \ - cb_data.args.hipModuleGetGlobal.name = (const char*)name; \ -}; -#define INIT_hipMemcpyHtoA_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyHtoA.dstArray = (hipArray*)dstArray; \ - cb_data.args.hipMemcpyHtoA.dstOffset = (size_t)dstOffset; \ - cb_data.args.hipMemcpyHtoA.srcHost = (const void*)srcHost; \ - cb_data.args.hipMemcpyHtoA.count = (size_t)count; \ -}; -#define INIT_hipCtxCreate_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxCreate.ctx = (hipCtx_t*)ctx; \ - cb_data.args.hipCtxCreate.flags = (unsigned int)flags; \ - cb_data.args.hipCtxCreate.device = (hipDevice_t)device; \ -}; -#define INIT_hipMemcpy2D_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpy2D.dst = (void*)dst; \ - cb_data.args.hipMemcpy2D.dpitch = (size_t)dpitch; \ - cb_data.args.hipMemcpy2D.src = (const void*)src; \ - cb_data.args.hipMemcpy2D.spitch = (size_t)spitch; \ - cb_data.args.hipMemcpy2D.width = (size_t)width; \ - cb_data.args.hipMemcpy2D.height = (size_t)height; \ - cb_data.args.hipMemcpy2D.kind = (hipMemcpyKind)kind; \ -}; -#define INIT_hipIpcCloseMemHandle_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipIpcCloseMemHandle.devPtr = (void*)devPtr; \ -}; -#define INIT_hipChooseDevice_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipChooseDevice.device = (int*)device; \ - cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_t*)prop; \ -}; -#define INIT_hipDeviceSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceSetSharedMemConfig.config = (hipSharedMemConfig)config; \ -}; -#define INIT_hipDeviceComputeCapability_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceComputeCapability.major = (int*)major; \ - cb_data.args.hipDeviceComputeCapability.minor = (int*)minor; \ - cb_data.args.hipDeviceComputeCapability.device = (hipDevice_t)device; \ -}; -#define INIT_hipRegisterApiCallback_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipRegisterApiCallback.id = (uint32_t)id; \ - cb_data.args.hipRegisterApiCallback.fun = (void*)fun; \ - cb_data.args.hipRegisterApiCallback.arg = (void*)arg; \ -}; -#define INIT_hipDeviceGet_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceGet.device = (hipDevice_t*)device; \ - cb_data.args.hipDeviceGet.ordinal = (int)deviceId; \ -}; -#define INIT_hipProfilerStart_CB_ARGS_DATA(cb_data) { \ -}; -#define INIT_hipCtxSetCacheConfig_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \ -}; -#define INIT_hipFuncSetCacheConfig_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipFuncSetCacheConfig.func = (const void*)func; \ - cb_data.args.hipFuncSetCacheConfig.config = (hipFuncCache_t)cacheConfig; \ -}; -#define INIT_hipMemcpyPeerAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyPeerAsync.dst = (void*)dst; \ - cb_data.args.hipMemcpyPeerAsync.dstDeviceId = (int)dstDevice; \ - cb_data.args.hipMemcpyPeerAsync.src = (const void*)src; \ - cb_data.args.hipMemcpyPeerAsync.srcDevice = (int)srcDevice; \ - cb_data.args.hipMemcpyPeerAsync.sizeBytes = (size_t)sizeBytes; \ - cb_data.args.hipMemcpyPeerAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipEventElapsedTime_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipEventElapsedTime.ms = (float*)ms; \ - cb_data.args.hipEventElapsedTime.start = (hipEvent_t)start; \ - cb_data.args.hipEventElapsedTime.stop = (hipEvent_t)stop; \ -}; -#define INIT_hipDevicePrimaryCtxReset_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDevicePrimaryCtxReset.dev = (hipDevice_t)dev; \ -}; -#define INIT_hipEventDestroy_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipEventDestroy.event = (hipEvent_t)event; \ -}; -#define INIT_hipCtxPopCurrent_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxPopCurrent.ctx = (hipCtx_t*)ctx; \ -}; -#define INIT_hipHostGetFlags_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipHostGetFlags.flagsPtr = (unsigned int*)flagsPtr; \ - cb_data.args.hipHostGetFlags.hostPtr = (void*)hostPtr; \ -}; -#define INIT_hipHostMalloc_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipHostMalloc.ptr = (void**)ptr; \ - cb_data.args.hipHostMalloc.size = (size_t)sizeBytes; \ - cb_data.args.hipHostMalloc.flags = (unsigned int)flags; \ -}; -#define INIT_hipDriverGetVersion_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDriverGetVersion.driverVersion = (int*)driverVersion; \ -}; -#define INIT_hipMemGetInfo_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemGetInfo.free = (size_t*)free; \ - cb_data.args.hipMemGetInfo.total = (size_t*)total; \ -}; -#define INIT_hipDeviceReset_CB_ARGS_DATA(cb_data) { \ -}; -#define INIT_hipMemset_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemset.dst = (void*)dst; \ - cb_data.args.hipMemset.value = (int)value; \ - cb_data.args.hipMemset.sizeBytes = (size_t)sizeBytes; \ -}; -#define INIT_hipMemsetD8_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemsetD8.dest = (hipDeviceptr_t)dst; \ - cb_data.args.hipMemsetD8.value = (unsigned char)value; \ - cb_data.args.hipMemsetD8.sizeBytes = (size_t)sizeBytes; \ -}; -#define INIT_hipHostRegister_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipHostRegister.hostPtr = (void*)hostPtr; \ - cb_data.args.hipHostRegister.sizeBytes = (size_t)sizeBytes; \ - cb_data.args.hipHostRegister.flags = (unsigned int)flags; \ -}; -#define INIT_hipCtxSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxSetSharedMemConfig.config = (hipSharedMemConfig)config; \ -}; -#define INIT_hipArray3DCreate_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipArray3DCreate.array = (hipArray**)array; \ - cb_data.args.hipArray3DCreate.pAllocateArray = (const HIP_ARRAY_DESCRIPTOR*)pAllocateArray; \ -}; -#define INIT_hipIpcOpenMemHandle_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipIpcOpenMemHandle.devPtr = (void**)devPtr; \ - cb_data.args.hipIpcOpenMemHandle.handle = (hipIpcMemHandle_t)handle; \ - cb_data.args.hipIpcOpenMemHandle.flags = (unsigned int)flags; \ -}; -#define INIT_hipGetLastError_CB_ARGS_DATA(cb_data) { \ -}; -#define INIT_hipCtxDestroy_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxDestroy.ctx = (hipCtx_t)ctx; \ -}; -#define INIT_hipDeviceGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \ -}; -#define INIT_hipRegisterActivityCallback_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipRegisterActivityCallback.id = (uint32_t)id; \ - cb_data.args.hipRegisterActivityCallback.fun = (void*)fun; \ - cb_data.args.hipRegisterActivityCallback.arg = (void*)arg; \ -}; -#define INIT_hipSetDeviceFlags_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipSetDeviceFlags.flags = (unsigned)flags; \ -}; -#define INIT_hipFree_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipFree.ptr = (void*)ptr; \ -}; -#define INIT_hipDeviceGetAttribute_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceGetAttribute.pi = (int*)pi; \ - cb_data.args.hipDeviceGetAttribute.attr = (hipDeviceAttribute_t)attr; \ - cb_data.args.hipDeviceGetAttribute.deviceId = (int)device; \ -}; -#define INIT_hipMemcpyDtoH_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyDtoH.dst = (void*)dst; \ - cb_data.args.hipMemcpyDtoH.src = (hipDeviceptr_t)src; \ - cb_data.args.hipMemcpyDtoH.sizeBytes = (size_t)sizeBytes; \ -}; -#define INIT_hipCtxDisablePeerAccess_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxDisablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \ -}; -#define INIT_hipDeviceGetByPCIBusId_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceGetByPCIBusId.device = (int*)device; \ - cb_data.args.hipDeviceGetByPCIBusId.pciBusId = (const char*)pciBusId; \ -}; -#define INIT_hipIpcGetMemHandle_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipIpcGetMemHandle.handle = (hipIpcMemHandle_t*)handle; \ - cb_data.args.hipIpcGetMemHandle.devPtr = (void*)devPtr; \ -}; -#define INIT_hipMemcpyHtoDAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyHtoDAsync.dst = (hipDeviceptr_t)dst; \ - cb_data.args.hipMemcpyHtoDAsync.src = (void*)src; \ - cb_data.args.hipMemcpyHtoDAsync.sizeBytes = (size_t)sizeBytes; \ - cb_data.args.hipMemcpyHtoDAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipCtxGetDevice_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxGetDevice.device = (hipDevice_t*)device; \ -}; -#define INIT_hipMemset3D_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemset3D.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \ - cb_data.args.hipMemset3D.value = (int)value; \ - cb_data.args.hipMemset3D.extent = (hipExtent)extent; \ -}; -#define INIT_hipModuleLoadData_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipModuleLoadData.module = (hipModule_t*)module; \ - cb_data.args.hipModuleLoadData.image = (const void*)image; \ -}; -#define INIT_hipDeviceTotalMem_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceTotalMem.bytes = (size_t*)bytes; \ - cb_data.args.hipDeviceTotalMem.device = (hipDevice_t)device; \ -}; -#define INIT_hipCtxSetCurrent_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxSetCurrent.ctx = (hipCtx_t)ctx; \ -}; -#define INIT_hipMallocHost_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMallocHost.ptr = (void**)ptr; \ - cb_data.args.hipMallocHost.size = (size_t)sizeBytes; \ -}; -#define INIT_hipDevicePrimaryCtxRetain_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDevicePrimaryCtxRetain.pctx = (hipCtx_t*)pctx; \ - cb_data.args.hipDevicePrimaryCtxRetain.dev = (hipDevice_t)dev; \ -}; -#define INIT_hipDeviceDisablePeerAccess_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceDisablePeerAccess.peerDeviceId = (int)peerDeviceId; \ -}; -#define INIT_hipStreamCreateWithFlags_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipStreamCreateWithFlags.stream = (hipStream_t*)stream; \ - cb_data.args.hipStreamCreateWithFlags.flags = (unsigned int)flags; \ -}; -#define INIT_hipMemcpyFromArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyFromArray.dst = (void*)dst; \ - cb_data.args.hipMemcpyFromArray.srcArray = (hipArray_const_t)srcArray; \ - cb_data.args.hipMemcpyFromArray.wOffset = (size_t)wOffset; \ - cb_data.args.hipMemcpyFromArray.hOffset = (size_t)hOffset; \ - cb_data.args.hipMemcpyFromArray.count = (size_t)count; \ - cb_data.args.hipMemcpyFromArray.kind = (hipMemcpyKind)kind; \ -}; -#define INIT_hipMemcpy2DAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpy2DAsync.dst = (void*)dst; \ - cb_data.args.hipMemcpy2DAsync.dpitch = (size_t)dpitch; \ - cb_data.args.hipMemcpy2DAsync.src = (const void*)src; \ - cb_data.args.hipMemcpy2DAsync.spitch = (size_t)spitch; \ - cb_data.args.hipMemcpy2DAsync.width = (size_t)width; \ - cb_data.args.hipMemcpy2DAsync.height = (size_t)height; \ - cb_data.args.hipMemcpy2DAsync.kind = (hipMemcpyKind)kind; \ - cb_data.args.hipMemcpy2DAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipFuncGetAttributes_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipFuncGetAttributes.attr = (hipFuncAttributes*)attr; \ - cb_data.args.hipFuncGetAttributes.func = (const void*)func; \ -}; -#define INIT_hipEventCreateWithFlags_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipEventCreateWithFlags.event = (hipEvent_t*)event; \ - cb_data.args.hipEventCreateWithFlags.flags = (unsigned)flags; \ -}; -#define INIT_hipStreamQuery_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipStreamQuery.stream = (hipStream_t)stream; \ -}; -#define INIT_hipDeviceGetPCIBusId_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceGetPCIBusId.pciBusId = (char*)pciBusId; \ - cb_data.args.hipDeviceGetPCIBusId.len = (int)len; \ - cb_data.args.hipDeviceGetPCIBusId.device = (int)device; \ -}; -#define INIT_hipMemcpy_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpy.dst = (void*)dst; \ - cb_data.args.hipMemcpy.src = (const void*)src; \ - cb_data.args.hipMemcpy.sizeBytes = (size_t)sizeBytes; \ - cb_data.args.hipMemcpy.kind = (hipMemcpyKind)kind; \ -}; -#define INIT_hipPeekAtLastError_CB_ARGS_DATA(cb_data) { \ -}; -#define INIT_hipHostAlloc_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipHostAlloc.ptr = (void**)ptr; \ - cb_data.args.hipHostAlloc.size = (size_t)sizeBytes; \ - cb_data.args.hipHostAlloc.flags = (unsigned int)flags; \ -}; -#define INIT_hipStreamAddCallback_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipStreamAddCallback.stream = (hipStream_t)stream; \ - cb_data.args.hipStreamAddCallback.callback = (hipStreamCallback_t)callback; \ - cb_data.args.hipStreamAddCallback.userData = (void*)userData; \ - cb_data.args.hipStreamAddCallback.flags = (unsigned int)flags; \ -}; -#define INIT_hipMemcpyToArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyToArray.dst = (hipArray*)dst; \ - cb_data.args.hipMemcpyToArray.wOffset = (size_t)wOffset; \ - cb_data.args.hipMemcpyToArray.hOffset = (size_t)hOffset; \ - cb_data.args.hipMemcpyToArray.src = (const void*)src; \ - cb_data.args.hipMemcpyToArray.count = (size_t)count; \ - cb_data.args.hipMemcpyToArray.kind = (hipMemcpyKind)kind; \ -}; -#define INIT_hipDeviceSynchronize_CB_ARGS_DATA(cb_data) { \ -}; -#define INIT_hipDeviceGetCacheConfig_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \ -}; -#define INIT_hipMalloc3D_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMalloc3D.pitchedDevPtr = (hipPitchedPtr*)pitchedDevPtr; \ - cb_data.args.hipMalloc3D.extent = (hipExtent)extent; \ -}; -#define INIT_hipPointerGetAttributes_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipPointerGetAttributes.attributes = (hipPointerAttribute_t*)attributes; \ - cb_data.args.hipPointerGetAttributes.ptr = (const void*)ptr; \ -}; -#define INIT_hipMemsetAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemsetAsync.dst = (void*)dst; \ - cb_data.args.hipMemsetAsync.value = (int)value; \ - cb_data.args.hipMemsetAsync.sizeBytes = (size_t)sizeBytes; \ - cb_data.args.hipMemsetAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipMemcpyToSymbol_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyToSymbol.symbolName = (const void*)symbolName; \ - cb_data.args.hipMemcpyToSymbol.src = (const void*)src; \ - cb_data.args.hipMemcpyToSymbol.sizeBytes = (size_t)count; \ - cb_data.args.hipMemcpyToSymbol.offset = (size_t)offset; \ - cb_data.args.hipMemcpyToSymbol.kind = (hipMemcpyKind)kind; \ -}; -#define INIT_hipCtxPushCurrent_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxPushCurrent.ctx = (hipCtx_t)ctx; \ -}; -#define INIT_hipMemcpyPeer_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyPeer.dst = (void*)dst; \ - cb_data.args.hipMemcpyPeer.dstDeviceId = (int)dstDevice; \ - cb_data.args.hipMemcpyPeer.src = (const void*)src; \ - cb_data.args.hipMemcpyPeer.srcDeviceId = (int)srcDevice; \ - cb_data.args.hipMemcpyPeer.sizeBytes = (size_t)sizeBytes; \ -}; -#define INIT_hipEventSynchronize_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipEventSynchronize.event = (hipEvent_t)event; \ -}; -#define INIT_hipMemcpyDtoDAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyDtoDAsync.dst = (hipDeviceptr_t)dst; \ - cb_data.args.hipMemcpyDtoDAsync.src = (hipDeviceptr_t)src; \ - cb_data.args.hipMemcpyDtoDAsync.sizeBytes = (size_t)sizeBytes; \ - cb_data.args.hipMemcpyDtoDAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipCtxEnablePeerAccess_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipCtxEnablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \ - cb_data.args.hipCtxEnablePeerAccess.flags = (unsigned int)flags; \ -}; -#define INIT_hipMemcpyDtoHAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyDtoHAsync.dst = (void*)dst; \ - cb_data.args.hipMemcpyDtoHAsync.src = (hipDeviceptr_t)src; \ - cb_data.args.hipMemcpyDtoHAsync.sizeBytes = (size_t)sizeBytes; \ - cb_data.args.hipMemcpyDtoHAsync.stream = (hipStream_t)stream; \ -}; -#define INIT_hipModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipModuleLaunchKernel.f = (hipFunction_t)f; \ - cb_data.args.hipModuleLaunchKernel.gridDimX = (unsigned int)gridDimX; \ - cb_data.args.hipModuleLaunchKernel.gridDimY = (unsigned int)gridDimY; \ - cb_data.args.hipModuleLaunchKernel.gridDimZ = (unsigned int)gridDimZ; \ - cb_data.args.hipModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \ - cb_data.args.hipModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \ - cb_data.args.hipModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \ - cb_data.args.hipModuleLaunchKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \ - cb_data.args.hipModuleLaunchKernel.stream = (hipStream_t)hStream; \ - cb_data.args.hipModuleLaunchKernel.kernelParams = (void**)kernelParams; \ - cb_data.args.hipModuleLaunchKernel.extra = (void**)extra; \ -}; -#define INIT_hipHccModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipModuleLaunchKernel.f = (hipFunction_t)f; \ -}; -#define INIT_hipModuleGetTexRef_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipModuleGetTexRef.texRef = (textureReference**)texRef; \ - cb_data.args.hipModuleGetTexRef.hmod = (hipModule_t)hmod; \ - cb_data.args.hipModuleGetTexRef.name = (const char*)name; \ -}; -#define INIT_hipRemoveActivityCallback_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipRemoveActivityCallback.id = (uint32_t)id; \ -}; -#define INIT_hipDeviceGetLimit_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDeviceGetLimit.pValue = (size_t*)pValue; \ - cb_data.args.hipDeviceGetLimit.limit = (hipLimit_t)limit; \ -}; -#define INIT_hipModuleLoadDataEx_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipModuleLoadDataEx.module = (hipModule_t*)module; \ - cb_data.args.hipModuleLoadDataEx.image = (const void*)image; \ - cb_data.args.hipModuleLoadDataEx.numOptions = (unsigned int)numOptions; \ - cb_data.args.hipModuleLoadDataEx.options = (hipJitOption*)options; \ - cb_data.args.hipModuleLoadDataEx.optionValues = (void**)optionValues; \ -}; -#define INIT_hipRuntimeGetVersion_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipRuntimeGetVersion.runtimeVersion = (int*)runtimeVersion; \ -}; -#define INIT_hipGetDeviceProperties_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipGetDeviceProperties.prop = (hipDeviceProp_t*)props; \ - cb_data.args.hipGetDeviceProperties.deviceId = (int)device; \ -}; -#define INIT_hipFreeArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipFreeArray.array = (hipArray*)array; \ -}; -#define INIT_hipDevicePrimaryCtxRelease_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipDevicePrimaryCtxRelease.dev = (hipDevice_t)dev; \ -}; -#define INIT_hipHostGetDevicePointer_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipHostGetDevicePointer.devPtr = (void**)devicePointer; \ - cb_data.args.hipHostGetDevicePointer.hstPtr = (void*)hostPointer; \ - cb_data.args.hipHostGetDevicePointer.flags = (unsigned int)flags; \ -}; -#define INIT_hipMemcpyParam2D_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyParam2D.pCopy = (const hip_Memcpy2D*)pCopy; \ -}; -#define INIT_hipConfigureCall_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipConfigureCall.gridDim = (dim3)gridDim; \ - cb_data.args.hipConfigureCall.blockDim = (dim3)blockDim; \ - cb_data.args.hipConfigureCall.sharedMem = (size_t)sharedMem; \ - cb_data.args.hipConfigureCall.stream = (hipStream_t)stream; \ -}; -#define INIT_hipModuleGetFunction_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipModuleGetFunction.function = (hipFunction_t*)hfunc; \ - cb_data.args.hipModuleGetFunction.module = (hipModule_t)hmod; \ - cb_data.args.hipModuleGetFunction.kname = (const char*)name; \ -}; -#define INIT_hipGetDevice_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipGetDevice.deviceId = (int*)deviceId; \ -}; -#define INIT_hipGetDeviceCount_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipGetDeviceCount.count = (int*)count; \ -}; -#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data) - -#if 0 -// HIP API string method, method name and parameters -const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) { - std::ostringstream oss; - switch (id) { - case HIP_API_ID_hipHostFree: - oss << "hipHostFree(" - << " ptr=" << data->args.hipHostFree.ptr - << ")"; - break; - case HIP_API_ID_hipMemcpyToSymbolAsync: - oss << "hipMemcpyToSymbolAsync(" - << " symbolName=" << data->args.hipMemcpyToSymbolAsync.symbolName << "," - << " src=" << data->args.hipMemcpyToSymbolAsync.src << "," - << " sizeBytes=" << data->args.hipMemcpyToSymbolAsync.sizeBytes << "," - << " offset=" << data->args.hipMemcpyToSymbolAsync.offset << "," - << " kind=" << data->args.hipMemcpyToSymbolAsync.kind << "," - << " stream=" << data->args.hipMemcpyToSymbolAsync.stream - << ")"; - break; - case HIP_API_ID_hipMallocPitch: - oss << "hipMallocPitch(" - << " ptr=" << data->args.hipMallocPitch.ptr << "," - << " pitch=" << data->args.hipMallocPitch.pitch << "," - << " width=" << data->args.hipMallocPitch.width << "," - << " height=" << data->args.hipMallocPitch.height - << ")"; - break; - case HIP_API_ID_hipMalloc: - oss << "hipMalloc(" - << " ptr=" << data->args.hipMalloc.ptr << "," - << " size=" << data->args.hipMalloc.size - << ")"; - break; - case HIP_API_ID_hipDeviceGetName: - oss << "hipDeviceGetName(" - << " name=" << data->args.hipDeviceGetName.name << "," - << " len=" << data->args.hipDeviceGetName.len << "," - << " device=" << data->args.hipDeviceGetName.device - << ")"; - break; - case HIP_API_ID_hipEventRecord: - oss << "hipEventRecord(" - << " event=" << data->args.hipEventRecord.event << "," - << " stream=" << data->args.hipEventRecord.stream - << ")"; - break; - case HIP_API_ID_hipCtxSynchronize: - oss << "hipCtxSynchronize(" - << ")"; - break; - case HIP_API_ID_hipSetDevice: - oss << "hipSetDevice(" - << " deviceId=" << data->args.hipSetDevice.deviceId - << ")"; - break; - case HIP_API_ID_hipSetupArgument: - oss << "hipSetupArgument(" - << " arg=" << data->args.hipSetupArgument.arg << "," - << " size=" << data->args.hipSetupArgument.size << "," - << " offset=" << data->args.hipSetupArgument.offset - << ")"; - break; - case HIP_API_ID_hipMemcpyFromSymbolAsync: - oss << "hipMemcpyFromSymbolAsync(" - << " dst=" << data->args.hipMemcpyFromSymbolAsync.dst << "," - << " symbolName=" << data->args.hipMemcpyFromSymbolAsync.symbolName << "," - << " sizeBytes=" << data->args.hipMemcpyFromSymbolAsync.sizeBytes << "," - << " offset=" << data->args.hipMemcpyFromSymbolAsync.offset << "," - << " kind=" << data->args.hipMemcpyFromSymbolAsync.kind << "," - << " stream=" << data->args.hipMemcpyFromSymbolAsync.stream - << ")"; - break; - case HIP_API_ID_hipMemcpyDtoD: - oss << "hipMemcpyDtoD(" - << " dst=" << data->args.hipMemcpyDtoD.dst << "," - << " src=" << data->args.hipMemcpyDtoD.src << "," - << " sizeBytes=" << data->args.hipMemcpyDtoD.sizeBytes - << ")"; - break; - case HIP_API_ID_hipMemcpy2DToArray: - oss << "hipMemcpy2DToArray(" - << " dst=" << data->args.hipMemcpy2DToArray.dst << "," - << " wOffset=" << data->args.hipMemcpy2DToArray.wOffset << "," - << " hOffset=" << data->args.hipMemcpy2DToArray.hOffset << "," - << " src=" << data->args.hipMemcpy2DToArray.src << "," - << " spitch=" << data->args.hipMemcpy2DToArray.spitch << "," - << " width=" << data->args.hipMemcpy2DToArray.width << "," - << " height=" << data->args.hipMemcpy2DToArray.height << "," - << " kind=" << data->args.hipMemcpy2DToArray.kind - << ")"; - break; - case HIP_API_ID_hipCtxGetCacheConfig: - oss << "hipCtxGetCacheConfig(" - << " cacheConfig=" << data->args.hipCtxGetCacheConfig.cacheConfig - << ")"; - break; - case HIP_API_ID_hipStreamWaitEvent: - oss << "hipStreamWaitEvent(" - << " stream=" << data->args.hipStreamWaitEvent.stream << "," - << " event=" << data->args.hipStreamWaitEvent.event << "," - << " flags=" << data->args.hipStreamWaitEvent.flags - << ")"; - break; - case HIP_API_ID_hipModuleLoad: - oss << "hipModuleLoad(" - << " module=" << data->args.hipModuleLoad.module << "," - << " fname=" << data->args.hipModuleLoad.fname - << ")"; - break; - case HIP_API_ID_hipDevicePrimaryCtxSetFlags: - oss << "hipDevicePrimaryCtxSetFlags(" - << " dev=" << data->args.hipDevicePrimaryCtxSetFlags.dev << "," - << " flags=" << data->args.hipDevicePrimaryCtxSetFlags.flags - << ")"; - break; - case HIP_API_ID_hipMemcpyAsync: - oss << "hipMemcpyAsync(" - << " dst=" << data->args.hipMemcpyAsync.dst << "," - << " src=" << data->args.hipMemcpyAsync.src << "," - << " sizeBytes=" << data->args.hipMemcpyAsync.sizeBytes << "," - << " kind=" << data->args.hipMemcpyAsync.kind << "," - << " stream=" << data->args.hipMemcpyAsync.stream - << ")"; - break; - case HIP_API_ID_hipMalloc3DArray: - oss << "hipMalloc3DArray(" - << " array=" << data->args.hipMalloc3DArray.array << "," - << " desc=" << data->args.hipMalloc3DArray.desc << "," - << " extent=" << data->args.hipMalloc3DArray.extent << "," - << " flags=" << data->args.hipMalloc3DArray.flags - << ")"; - break; - case HIP_API_ID_hipStreamCreate: - oss << "hipStreamCreate(" - << " stream=" << data->args.hipStreamCreate.stream - << ")"; - break; - case HIP_API_ID_hipCtxGetCurrent: - oss << "hipCtxGetCurrent(" - << " ctx=" << data->args.hipCtxGetCurrent.ctx - << ")"; - break; - case HIP_API_ID_hipDevicePrimaryCtxGetState: - oss << "hipDevicePrimaryCtxGetState(" - << " dev=" << data->args.hipDevicePrimaryCtxGetState.dev << "," - << " flags=" << data->args.hipDevicePrimaryCtxGetState.flags << "," - << " active=" << data->args.hipDevicePrimaryCtxGetState.active - << ")"; - break; - case HIP_API_ID_hipEventQuery: - oss << "hipEventQuery(" - << " event=" << data->args.hipEventQuery.event - << ")"; - break; - case HIP_API_ID_hipEventCreate: - oss << "hipEventCreate(" - << " event=" << data->args.hipEventCreate.event - << ")"; - break; - case HIP_API_ID_hipMemGetAddressRange: - oss << "hipMemGetAddressRange(" - << " pbase=" << data->args.hipMemGetAddressRange.pbase << "," - << " psize=" << data->args.hipMemGetAddressRange.psize << "," - << " dptr=" << data->args.hipMemGetAddressRange.dptr - << ")"; - break; - case HIP_API_ID_hipMemcpyFromSymbol: - oss << "hipMemcpyFromSymbol(" - << " dst=" << data->args.hipMemcpyFromSymbol.dst << "," - << " symbolName=" << data->args.hipMemcpyFromSymbol.symbolName << "," - << " sizeBytes=" << data->args.hipMemcpyFromSymbol.sizeBytes << "," - << " offset=" << data->args.hipMemcpyFromSymbol.offset << "," - << " kind=" << data->args.hipMemcpyFromSymbol.kind - << ")"; - break; - case HIP_API_ID_hipArrayCreate: - oss << "hipArrayCreate(" - << " pHandle=" << data->args.hipArrayCreate.pHandle << "," - << " pAllocateArray=" << data->args.hipArrayCreate.pAllocateArray - << ")"; - break; - case HIP_API_ID_hipStreamGetFlags: - oss << "hipStreamGetFlags(" - << " stream=" << data->args.hipStreamGetFlags.stream << "," - << " flags=" << data->args.hipStreamGetFlags.flags - << ")"; - break; - case HIP_API_ID_hipMallocArray: - oss << "hipMallocArray(" - << " array=" << data->args.hipMallocArray.array << "," - << " desc=" << data->args.hipMallocArray.desc << "," - << " width=" << data->args.hipMallocArray.width << "," - << " height=" << data->args.hipMallocArray.height << "," - << " flags=" << data->args.hipMallocArray.flags - << ")"; - break; - case HIP_API_ID_hipCtxGetSharedMemConfig: - oss << "hipCtxGetSharedMemConfig(" - << " pConfig=" << data->args.hipCtxGetSharedMemConfig.pConfig - << ")"; - break; - case HIP_API_ID_hipMemPtrGetInfo: - oss << "hipMemPtrGetInfo(" - << " ptr=" << data->args.hipMemPtrGetInfo.ptr << "," - << " size=" << data->args.hipMemPtrGetInfo.size - << ")"; - break; - case HIP_API_ID_hipCtxGetFlags: - oss << "hipCtxGetFlags(" - << " flags=" << data->args.hipCtxGetFlags.flags - << ")"; - break; - case HIP_API_ID_hipStreamDestroy: - oss << "hipStreamDestroy(" - << " stream=" << data->args.hipStreamDestroy.stream - << ")"; - break; - case HIP_API_ID_hipMemset3DAsync: - oss << "hipMemset3DAsync(" - << " pitchedDevPtr=" << data->args.hipMemset3DAsync.pitchedDevPtr << "," - << " value=" << data->args.hipMemset3DAsync.value << "," - << " extent=" << data->args.hipMemset3DAsync.extent << "," - << " stream=" << data->args.hipMemset3DAsync.stream - << ")"; - break; - case HIP_API_ID_hipMemcpy3D: - oss << "hipMemcpy3D(" - << " p=" << data->args.hipMemcpy3D.p - << ")"; - break; - case HIP_API_ID_hipInit: - oss << "hipInit(" - << " flags=" << data->args.hipInit.flags - << ")"; - break; - case HIP_API_ID_hipMemcpyAtoH: - oss << "hipMemcpyAtoH(" - << " dst=" << data->args.hipMemcpyAtoH.dst << "," - << " srcArray=" << data->args.hipMemcpyAtoH.srcArray << "," - << " srcOffset=" << data->args.hipMemcpyAtoH.srcOffset << "," - << " count=" << data->args.hipMemcpyAtoH.count - << ")"; - break; - case HIP_API_ID_hipMemset2D: - oss << "hipMemset2D(" - << " dst=" << data->args.hipMemset2D.dst << "," - << " pitch=" << data->args.hipMemset2D.pitch << "," - << " value=" << data->args.hipMemset2D.value << "," - << " width=" << data->args.hipMemset2D.width << "," - << " height=" << data->args.hipMemset2D.height - << ")"; - break; - case HIP_API_ID_hipMemset2DAsync: - oss << "hipMemset2DAsync(" - << " dst=" << data->args.hipMemset2DAsync.dst << "," - << " pitch=" << data->args.hipMemset2DAsync.pitch << "," - << " value=" << data->args.hipMemset2DAsync.value << "," - << " width=" << data->args.hipMemset2DAsync.width << "," - << " height=" << data->args.hipMemset2DAsync.height << "," - << " stream=" << data->args.hipMemset2DAsync.stream - << ")"; - break; - case HIP_API_ID_hipDeviceCanAccessPeer: - oss << "hipDeviceCanAccessPeer(" - << " canAccessPeer=" << data->args.hipDeviceCanAccessPeer.canAccessPeer << "," - << " deviceId=" << data->args.hipDeviceCanAccessPeer.deviceId << "," - << " peerDeviceId=" << data->args.hipDeviceCanAccessPeer.peerDeviceId - << ")"; - break; - case HIP_API_ID_hipDeviceEnablePeerAccess: - oss << "hipDeviceEnablePeerAccess(" - << " peerDeviceId=" << data->args.hipDeviceEnablePeerAccess.peerDeviceId << "," - << " flags=" << data->args.hipDeviceEnablePeerAccess.flags - << ")"; - break; - case HIP_API_ID_hipModuleUnload: - oss << "hipModuleUnload(" - << " module=" << data->args.hipModuleUnload.module - << ")"; - break; - case HIP_API_ID_hipHostUnregister: - oss << "hipHostUnregister(" - << " hostPtr=" << data->args.hipHostUnregister.hostPtr - << ")"; - break; - case HIP_API_ID_hipProfilerStop: - oss << "hipProfilerStop(" - << ")"; - break; - case HIP_API_ID_hipLaunchByPtr: - oss << "hipLaunchByPtr(" - << " func=" << data->args.hipLaunchByPtr.func - << ")"; - break; - case HIP_API_ID_hipStreamSynchronize: - oss << "hipStreamSynchronize(" - << " stream=" << data->args.hipStreamSynchronize.stream - << ")"; - break; - case HIP_API_ID_hipFreeHost: - oss << "hipFreeHost(" - << " ptr=" << data->args.hipFreeHost.ptr - << ")"; - break; - case HIP_API_ID_hipRemoveApiCallback: - oss << "hipRemoveApiCallback(" - << " id=" << data->args.hipRemoveApiCallback.id - << ")"; - break; - case HIP_API_ID_hipDeviceSetCacheConfig: - oss << "hipDeviceSetCacheConfig(" - << " cacheConfig=" << data->args.hipDeviceSetCacheConfig.cacheConfig - << ")"; - break; - case HIP_API_ID_hipCtxGetApiVersion: - oss << "hipCtxGetApiVersion(" - << " ctx=" << data->args.hipCtxGetApiVersion.ctx << "," - << " apiVersion=" << data->args.hipCtxGetApiVersion.apiVersion - << ")"; - break; - case HIP_API_ID_hipMemcpyHtoD: - oss << "hipMemcpyHtoD(" - << " dst=" << data->args.hipMemcpyHtoD.dst << "," - << " src=" << data->args.hipMemcpyHtoD.src << "," - << " sizeBytes=" << data->args.hipMemcpyHtoD.sizeBytes - << ")"; - break; - case HIP_API_ID_hipModuleGetGlobal: - oss << "hipModuleGetGlobal(" - << " dptr=" << data->args.hipModuleGetGlobal.dptr << "," - << " bytes=" << data->args.hipModuleGetGlobal.bytes << "," - << " hmod=" << data->args.hipModuleGetGlobal.hmod << "," - << " name=" << data->args.hipModuleGetGlobal.name - << ")"; - break; - case HIP_API_ID_hipMemcpyHtoA: - oss << "hipMemcpyHtoA(" - << " dstArray=" << data->args.hipMemcpyHtoA.dstArray << "," - << " dstOffset=" << data->args.hipMemcpyHtoA.dstOffset << "," - << " srcHost=" << data->args.hipMemcpyHtoA.srcHost << "," - << " count=" << data->args.hipMemcpyHtoA.count - << ")"; - break; - case HIP_API_ID_hipCtxCreate: - oss << "hipCtxCreate(" - << " ctx=" << data->args.hipCtxCreate.ctx << "," - << " flags=" << data->args.hipCtxCreate.flags << "," - << " device=" << data->args.hipCtxCreate.device - << ")"; - break; - case HIP_API_ID_hipMemcpy2D: - oss << "hipMemcpy2D(" - << " dst=" << data->args.hipMemcpy2D.dst << "," - << " dpitch=" << data->args.hipMemcpy2D.dpitch << "," - << " src=" << data->args.hipMemcpy2D.src << "," - << " spitch=" << data->args.hipMemcpy2D.spitch << "," - << " width=" << data->args.hipMemcpy2D.width << "," - << " height=" << data->args.hipMemcpy2D.height << "," - << " kind=" << data->args.hipMemcpy2D.kind - << ")"; - break; - case HIP_API_ID_hipIpcCloseMemHandle: - oss << "hipIpcCloseMemHandle(" - << " devPtr=" << data->args.hipIpcCloseMemHandle.devPtr - << ")"; - break; - case HIP_API_ID_hipChooseDevice: - oss << "hipChooseDevice(" - << " device=" << data->args.hipChooseDevice.device << "," - << " prop=" << data->args.hipChooseDevice.prop - << ")"; - break; - case HIP_API_ID_hipDeviceSetSharedMemConfig: - oss << "hipDeviceSetSharedMemConfig(" - << " config=" << data->args.hipDeviceSetSharedMemConfig.config - << ")"; - break; - case HIP_API_ID_hipDeviceComputeCapability: - oss << "hipDeviceComputeCapability(" - << " major=" << data->args.hipDeviceComputeCapability.major << "," - << " minor=" << data->args.hipDeviceComputeCapability.minor << "," - << " device=" << data->args.hipDeviceComputeCapability.device - << ")"; - break; - case HIP_API_ID_hipRegisterApiCallback: - oss << "hipRegisterApiCallback(" - << " id=" << data->args.hipRegisterApiCallback.id << "," - << " fun=" << data->args.hipRegisterApiCallback.fun << "," - << " arg=" << data->args.hipRegisterApiCallback.arg - << ")"; - break; - case HIP_API_ID_hipDeviceGet: - oss << "hipDeviceGet(" - << " device=" << data->args.hipDeviceGet.device << "," - << " ordinal=" << data->args.hipDeviceGet.ordinal - << ")"; - break; - case HIP_API_ID_hipProfilerStart: - oss << "hipProfilerStart(" - << ")"; - break; - case HIP_API_ID_hipCtxSetCacheConfig: - oss << "hipCtxSetCacheConfig(" - << " cacheConfig=" << data->args.hipCtxSetCacheConfig.cacheConfig - << ")"; - break; - case HIP_API_ID_hipFuncSetCacheConfig: - oss << "hipFuncSetCacheConfig(" - << " func=" << data->args.hipFuncSetCacheConfig.func << "," - << " config=" << data->args.hipFuncSetCacheConfig.config - << ")"; - break; - case HIP_API_ID_hipMemcpyPeerAsync: - oss << "hipMemcpyPeerAsync(" - << " dst=" << data->args.hipMemcpyPeerAsync.dst << "," - << " dstDeviceId=" << data->args.hipMemcpyPeerAsync.dstDeviceId << "," - << " src=" << data->args.hipMemcpyPeerAsync.src << "," - << " srcDevice=" << data->args.hipMemcpyPeerAsync.srcDevice << "," - << " sizeBytes=" << data->args.hipMemcpyPeerAsync.sizeBytes << "," - << " stream=" << data->args.hipMemcpyPeerAsync.stream - << ")"; - break; - case HIP_API_ID_hipEventElapsedTime: - oss << "hipEventElapsedTime(" - << " ms=" << data->args.hipEventElapsedTime.ms << "," - << " start=" << data->args.hipEventElapsedTime.start << "," - << " stop=" << data->args.hipEventElapsedTime.stop - << ")"; - break; - case HIP_API_ID_hipDevicePrimaryCtxReset: - oss << "hipDevicePrimaryCtxReset(" - << " dev=" << data->args.hipDevicePrimaryCtxReset.dev - << ")"; - break; - case HIP_API_ID_hipEventDestroy: - oss << "hipEventDestroy(" - << " event=" << data->args.hipEventDestroy.event - << ")"; - break; - case HIP_API_ID_hipCtxPopCurrent: - oss << "hipCtxPopCurrent(" - << " ctx=" << data->args.hipCtxPopCurrent.ctx - << ")"; - break; - case HIP_API_ID_hipHostGetFlags: - oss << "hipHostGetFlags(" - << " flagsPtr=" << data->args.hipHostGetFlags.flagsPtr << "," - << " hostPtr=" << data->args.hipHostGetFlags.hostPtr - << ")"; - break; - case HIP_API_ID_hipHostMalloc: - oss << "hipHostMalloc(" - << " ptr=" << data->args.hipHostMalloc.ptr << "," - << " size=" << data->args.hipHostMalloc.size << "," - << " flags=" << data->args.hipHostMalloc.flags - << ")"; - break; - case HIP_API_ID_hipDriverGetVersion: - oss << "hipDriverGetVersion(" - << " driverVersion=" << data->args.hipDriverGetVersion.driverVersion - << ")"; - break; - case HIP_API_ID_hipMemGetInfo: - oss << "hipMemGetInfo(" - << " free=" << data->args.hipMemGetInfo.free << "," - << " total=" << data->args.hipMemGetInfo.total - << ")"; - break; - case HIP_API_ID_hipDeviceReset: - oss << "hipDeviceReset(" - << ")"; - break; - case HIP_API_ID_hipMemset: - oss << "hipMemset(" - << " dst=" << data->args.hipMemset.dst << "," - << " value=" << data->args.hipMemset.value << "," - << " sizeBytes=" << data->args.hipMemset.sizeBytes - << ")"; - break; - case HIP_API_ID_hipMemsetD8: - oss << "hipMemsetD8(" - << " dest=" << data->args.hipMemsetD8.dest << "," - << " value=" << data->args.hipMemsetD8.value << "," - << " sizeBytes=" << data->args.hipMemsetD8.sizeBytes - << ")"; - break; - case HIP_API_ID_hipHostRegister: - oss << "hipHostRegister(" - << " hostPtr=" << data->args.hipHostRegister.hostPtr << "," - << " sizeBytes=" << data->args.hipHostRegister.sizeBytes << "," - << " flags=" << data->args.hipHostRegister.flags - << ")"; - break; - case HIP_API_ID_hipCtxSetSharedMemConfig: - oss << "hipCtxSetSharedMemConfig(" - << " config=" << data->args.hipCtxSetSharedMemConfig.config - << ")"; - break; - case HIP_API_ID_hipArray3DCreate: - oss << "hipArray3DCreate(" - << " array=" << data->args.hipArray3DCreate.array << "," - << " pAllocateArray=" << data->args.hipArray3DCreate.pAllocateArray - << ")"; - break; - case HIP_API_ID_hipIpcOpenMemHandle: - oss << "hipIpcOpenMemHandle(" - << " devPtr=" << data->args.hipIpcOpenMemHandle.devPtr << "," - << " handle=" << data->args.hipIpcOpenMemHandle.handle << "," - << " flags=" << data->args.hipIpcOpenMemHandle.flags - << ")"; - break; - case HIP_API_ID_hipGetLastError: - oss << "hipGetLastError(" - << ")"; - break; - case HIP_API_ID_hipCtxDestroy: - oss << "hipCtxDestroy(" - << " ctx=" << data->args.hipCtxDestroy.ctx - << ")"; - break; - case HIP_API_ID_hipDeviceGetSharedMemConfig: - oss << "hipDeviceGetSharedMemConfig(" - << " pConfig=" << data->args.hipDeviceGetSharedMemConfig.pConfig - << ")"; - break; - case HIP_API_ID_hipRegisterActivityCallback: - oss << "hipRegisterActivityCallback(" - << " id=" << data->args.hipRegisterActivityCallback.id << "," - << " fun=" << data->args.hipRegisterActivityCallback.fun << "," - << " arg=" << data->args.hipRegisterActivityCallback.arg - << ")"; - break; - case HIP_API_ID_hipSetDeviceFlags: - oss << "hipSetDeviceFlags(" - << " flags=" << data->args.hipSetDeviceFlags.flags - << ")"; - break; - case HIP_API_ID_hipFree: - oss << "hipFree(" - << " ptr=" << data->args.hipFree.ptr - << ")"; - break; - case HIP_API_ID_hipDeviceGetAttribute: - oss << "hipDeviceGetAttribute(" - << " pi=" << data->args.hipDeviceGetAttribute.pi << "," - << " attr=" << data->args.hipDeviceGetAttribute.attr << "," - << " deviceId=" << data->args.hipDeviceGetAttribute.deviceId - << ")"; - break; - case HIP_API_ID_hipMemcpyDtoH: - oss << "hipMemcpyDtoH(" - << " dst=" << data->args.hipMemcpyDtoH.dst << "," - << " src=" << data->args.hipMemcpyDtoH.src << "," - << " sizeBytes=" << data->args.hipMemcpyDtoH.sizeBytes - << ")"; - break; - case HIP_API_ID_hipCtxDisablePeerAccess: - oss << "hipCtxDisablePeerAccess(" - << " peerCtx=" << data->args.hipCtxDisablePeerAccess.peerCtx - << ")"; - break; - case HIP_API_ID_hipDeviceGetByPCIBusId: - oss << "hipDeviceGetByPCIBusId(" - << " device=" << data->args.hipDeviceGetByPCIBusId.device << "," - << " pciBusId=" << data->args.hipDeviceGetByPCIBusId.pciBusId - << ")"; - break; - case HIP_API_ID_hipIpcGetMemHandle: - oss << "hipIpcGetMemHandle(" - << " handle=" << data->args.hipIpcGetMemHandle.handle << "," - << " devPtr=" << data->args.hipIpcGetMemHandle.devPtr - << ")"; - break; - case HIP_API_ID_hipMemcpyHtoDAsync: - oss << "hipMemcpyHtoDAsync(" - << " dst=" << data->args.hipMemcpyHtoDAsync.dst << "," - << " src=" << data->args.hipMemcpyHtoDAsync.src << "," - << " sizeBytes=" << data->args.hipMemcpyHtoDAsync.sizeBytes << "," - << " stream=" << data->args.hipMemcpyHtoDAsync.stream - << ")"; - break; - case HIP_API_ID_hipCtxGetDevice: - oss << "hipCtxGetDevice(" - << " device=" << data->args.hipCtxGetDevice.device - << ")"; - break; - case HIP_API_ID_hipMemset3D: - oss << "hipMemset3D(" - << " pitchedDevPtr=" << data->args.hipMemset3D.pitchedDevPtr << "," - << " value=" << data->args.hipMemset3D.value << "," - << " extent=" << data->args.hipMemset3D.extent - << ")"; - break; - case HIP_API_ID_hipModuleLoadData: - oss << "hipModuleLoadData(" - << " module=" << data->args.hipModuleLoadData.module << "," - << " image=" << data->args.hipModuleLoadData.image - << ")"; - break; - case HIP_API_ID_hipDeviceTotalMem: - oss << "hipDeviceTotalMem(" - << " bytes=" << data->args.hipDeviceTotalMem.bytes << "," - << " device=" << data->args.hipDeviceTotalMem.device - << ")"; - break; - case HIP_API_ID_hipCtxSetCurrent: - oss << "hipCtxSetCurrent(" - << " ctx=" << data->args.hipCtxSetCurrent.ctx - << ")"; - break; - case HIP_API_ID_hipMallocHost: - oss << "hipMallocHost(" - << " ptr=" << data->args.hipMallocHost.ptr << "," - << " size=" << data->args.hipMallocHost.size - << ")"; - break; - case HIP_API_ID_hipDevicePrimaryCtxRetain: - oss << "hipDevicePrimaryCtxRetain(" - << " pctx=" << data->args.hipDevicePrimaryCtxRetain.pctx << "," - << " dev=" << data->args.hipDevicePrimaryCtxRetain.dev - << ")"; - break; - case HIP_API_ID_hipDeviceDisablePeerAccess: - oss << "hipDeviceDisablePeerAccess(" - << " peerDeviceId=" << data->args.hipDeviceDisablePeerAccess.peerDeviceId - << ")"; - break; - case HIP_API_ID_hipStreamCreateWithFlags: - oss << "hipStreamCreateWithFlags(" - << " stream=" << data->args.hipStreamCreateWithFlags.stream << "," - << " flags=" << data->args.hipStreamCreateWithFlags.flags - << ")"; - break; - case HIP_API_ID_hipMemcpyFromArray: - oss << "hipMemcpyFromArray(" - << " dst=" << data->args.hipMemcpyFromArray.dst << "," - << " srcArray=" << data->args.hipMemcpyFromArray.srcArray << "," - << " wOffset=" << data->args.hipMemcpyFromArray.wOffset << "," - << " hOffset=" << data->args.hipMemcpyFromArray.hOffset << "," - << " count=" << data->args.hipMemcpyFromArray.count << "," - << " kind=" << data->args.hipMemcpyFromArray.kind - << ")"; - break; - case HIP_API_ID_hipMemcpy2DAsync: - oss << "hipMemcpy2DAsync(" - << " dst=" << data->args.hipMemcpy2DAsync.dst << "," - << " dpitch=" << data->args.hipMemcpy2DAsync.dpitch << "," - << " src=" << data->args.hipMemcpy2DAsync.src << "," - << " spitch=" << data->args.hipMemcpy2DAsync.spitch << "," - << " width=" << data->args.hipMemcpy2DAsync.width << "," - << " height=" << data->args.hipMemcpy2DAsync.height << "," - << " kind=" << data->args.hipMemcpy2DAsync.kind << "," - << " stream=" << data->args.hipMemcpy2DAsync.stream - << ")"; - break; - case HIP_API_ID_hipFuncGetAttributes: - oss << "hipFuncGetAttributes(" - << " attr=" << data->args.hipFuncGetAttributes.attr << "," - << " func=" << data->args.hipFuncGetAttributes.func - << ")"; - break; - case HIP_API_ID_hipEventCreateWithFlags: - oss << "hipEventCreateWithFlags(" - << " event=" << data->args.hipEventCreateWithFlags.event << "," - << " flags=" << data->args.hipEventCreateWithFlags.flags - << ")"; - break; - case HIP_API_ID_hipStreamQuery: - oss << "hipStreamQuery(" - << " stream=" << data->args.hipStreamQuery.stream - << ")"; - break; - case HIP_API_ID_hipDeviceGetPCIBusId: - oss << "hipDeviceGetPCIBusId(" - << " pciBusId=" << data->args.hipDeviceGetPCIBusId.pciBusId << "," - << " len=" << data->args.hipDeviceGetPCIBusId.len << "," - << " device=" << data->args.hipDeviceGetPCIBusId.device - << ")"; - break; - case HIP_API_ID_hipMemcpy: - oss << "hipMemcpy(" - << " dst=" << data->args.hipMemcpy.dst << "," - << " src=" << data->args.hipMemcpy.src << "," - << " sizeBytes=" << data->args.hipMemcpy.sizeBytes << "," - << " kind=" << data->args.hipMemcpy.kind - << ")"; - break; - case HIP_API_ID_hipPeekAtLastError: - oss << "hipPeekAtLastError(" - << ")"; - break; - case HIP_API_ID_hipHostAlloc: - oss << "hipHostAlloc(" - << " ptr=" << data->args.hipHostAlloc.ptr << "," - << " size=" << data->args.hipHostAlloc.size << "," - << " flags=" << data->args.hipHostAlloc.flags - << ")"; - break; - case HIP_API_ID_hipStreamAddCallback: - oss << "hipStreamAddCallback(" - << " stream=" << data->args.hipStreamAddCallback.stream << "," - << " callback=" << data->args.hipStreamAddCallback.callback << "," - << " userData=" << data->args.hipStreamAddCallback.userData << "," - << " flags=" << data->args.hipStreamAddCallback.flags - << ")"; - break; - case HIP_API_ID_hipMemcpyToArray: - oss << "hipMemcpyToArray(" - << " dst=" << data->args.hipMemcpyToArray.dst << "," - << " wOffset=" << data->args.hipMemcpyToArray.wOffset << "," - << " hOffset=" << data->args.hipMemcpyToArray.hOffset << "," - << " src=" << data->args.hipMemcpyToArray.src << "," - << " count=" << data->args.hipMemcpyToArray.count << "," - << " kind=" << data->args.hipMemcpyToArray.kind - << ")"; - break; - case HIP_API_ID_hipDeviceSynchronize: - oss << "hipDeviceSynchronize(" - << ")"; - break; - case HIP_API_ID_hipDeviceGetCacheConfig: - oss << "hipDeviceGetCacheConfig(" - << " cacheConfig=" << data->args.hipDeviceGetCacheConfig.cacheConfig - << ")"; - break; - case HIP_API_ID_hipMalloc3D: - oss << "hipMalloc3D(" - << " pitchedDevPtr=" << data->args.hipMalloc3D.pitchedDevPtr << "," - << " extent=" << data->args.hipMalloc3D.extent - << ")"; - break; - case HIP_API_ID_hipPointerGetAttributes: - oss << "hipPointerGetAttributes(" - << " attributes=" << data->args.hipPointerGetAttributes.attributes << "," - << " ptr=" << data->args.hipPointerGetAttributes.ptr - << ")"; - break; - case HIP_API_ID_hipMemsetAsync: - oss << "hipMemsetAsync(" - << " dst=" << data->args.hipMemsetAsync.dst << "," - << " value=" << data->args.hipMemsetAsync.value << "," - << " sizeBytes=" << data->args.hipMemsetAsync.sizeBytes << "," - << " stream=" << data->args.hipMemsetAsync.stream - << ")"; - break; - case HIP_API_ID_hipMemcpyToSymbol: - oss << "hipMemcpyToSymbol(" - << " symbolName=" << data->args.hipMemcpyToSymbol.symbolName << "," - << " src=" << data->args.hipMemcpyToSymbol.src << "," - << " sizeBytes=" << data->args.hipMemcpyToSymbol.sizeBytes << "," - << " offset=" << data->args.hipMemcpyToSymbol.offset << "," - << " kind=" << data->args.hipMemcpyToSymbol.kind - << ")"; - break; - case HIP_API_ID_hipCtxPushCurrent: - oss << "hipCtxPushCurrent(" - << " ctx=" << data->args.hipCtxPushCurrent.ctx - << ")"; - break; - case HIP_API_ID_hipMemcpyPeer: - oss << "hipMemcpyPeer(" - << " dst=" << data->args.hipMemcpyPeer.dst << "," - << " dstDeviceId=" << data->args.hipMemcpyPeer.dstDeviceId << "," - << " src=" << data->args.hipMemcpyPeer.src << "," - << " srcDeviceId=" << data->args.hipMemcpyPeer.srcDeviceId << "," - << " sizeBytes=" << data->args.hipMemcpyPeer.sizeBytes - << ")"; - break; - case HIP_API_ID_hipEventSynchronize: - oss << "hipEventSynchronize(" - << " event=" << data->args.hipEventSynchronize.event - << ")"; - break; - case HIP_API_ID_hipMemcpyDtoDAsync: - oss << "hipMemcpyDtoDAsync(" - << " dst=" << data->args.hipMemcpyDtoDAsync.dst << "," - << " src=" << data->args.hipMemcpyDtoDAsync.src << "," - << " sizeBytes=" << data->args.hipMemcpyDtoDAsync.sizeBytes << "," - << " stream=" << data->args.hipMemcpyDtoDAsync.stream - << ")"; - break; - case HIP_API_ID_hipCtxEnablePeerAccess: - oss << "hipCtxEnablePeerAccess(" - << " peerCtx=" << data->args.hipCtxEnablePeerAccess.peerCtx << "," - << " flags=" << data->args.hipCtxEnablePeerAccess.flags - << ")"; - break; - case HIP_API_ID_hipMemcpyDtoHAsync: - oss << "hipMemcpyDtoHAsync(" - << " dst=" << data->args.hipMemcpyDtoHAsync.dst << "," - << " src=" << data->args.hipMemcpyDtoHAsync.src << "," - << " sizeBytes=" << data->args.hipMemcpyDtoHAsync.sizeBytes << "," - << " stream=" << data->args.hipMemcpyDtoHAsync.stream - << ")"; - break; - case HIP_API_ID_hipModuleLaunchKernel: - oss << "hipModuleLaunchKernel(" - << " f=" << data->args.hipModuleLaunchKernel.f << "," - << " gridDimX=" << data->args.hipModuleLaunchKernel.gridDimX << "," - << " gridDimY=" << data->args.hipModuleLaunchKernel.gridDimY << "," - << " gridDimZ=" << data->args.hipModuleLaunchKernel.gridDimZ << "," - << " blockDimX=" << data->args.hipModuleLaunchKernel.blockDimX << "," - << " blockDimY=" << data->args.hipModuleLaunchKernel.blockDimY << "," - << " blockDimZ=" << data->args.hipModuleLaunchKernel.blockDimZ << "," - << " sharedMemBytes=" << data->args.hipModuleLaunchKernel.sharedMemBytes << "," - << " stream=" << data->args.hipModuleLaunchKernel.stream << "," - << " kernelParams=" << data->args.hipModuleLaunchKernel.kernelParams << "," - << " extra=" << data->args.hipModuleLaunchKernel.extra - << ")"; - break; - case HIP_API_ID_hipHccModuleLaunchKernel: - oss << "hipHccModuleLaunchKernel(" - << " f=" << data->args.hipHccModuleLaunchKernel.f << "," - << ")"; - break; - case HIP_API_ID_hipModuleGetTexRef: - oss << "hipModuleGetTexRef(" - << " texRef=" << data->args.hipModuleGetTexRef.texRef << "," - << " hmod=" << data->args.hipModuleGetTexRef.hmod << "," - << " name=" << data->args.hipModuleGetTexRef.name - << ")"; - break; - case HIP_API_ID_hipRemoveActivityCallback: - oss << "hipRemoveActivityCallback(" - << " id=" << data->args.hipRemoveActivityCallback.id - << ")"; - break; - case HIP_API_ID_hipDeviceGetLimit: - oss << "hipDeviceGetLimit(" - << " pValue=" << data->args.hipDeviceGetLimit.pValue << "," - << " limit=" << data->args.hipDeviceGetLimit.limit - << ")"; - break; - case HIP_API_ID_hipModuleLoadDataEx: - oss << "hipModuleLoadDataEx(" - << " module=" << data->args.hipModuleLoadDataEx.module << "," - << " image=" << data->args.hipModuleLoadDataEx.image << "," - << " numOptions=" << data->args.hipModuleLoadDataEx.numOptions << "," - << " options=" << data->args.hipModuleLoadDataEx.options << "," - << " optionValues=" << data->args.hipModuleLoadDataEx.optionValues - << ")"; - break; - case HIP_API_ID_hipRuntimeGetVersion: - oss << "hipRuntimeGetVersion(" - << " runtimeVersion=" << data->args.hipRuntimeGetVersion.runtimeVersion - << ")"; - break; - case HIP_API_ID_hipGetDeviceProperties: - oss << "hipGetDeviceProperties(" - << " prop=" << data->args.hipGetDeviceProperties.prop << "," - << " deviceId=" << data->args.hipGetDeviceProperties.deviceId - << ")"; - break; - case HIP_API_ID_hipFreeArray: - oss << "hipFreeArray(" - << " array=" << data->args.hipFreeArray.array - << ")"; - break; - case HIP_API_ID_hipDevicePrimaryCtxRelease: - oss << "hipDevicePrimaryCtxRelease(" - << " dev=" << data->args.hipDevicePrimaryCtxRelease.dev - << ")"; - break; - case HIP_API_ID_hipHostGetDevicePointer: - oss << "hipHostGetDevicePointer(" - << " devPtr=" << data->args.hipHostGetDevicePointer.devPtr << "," - << " hstPtr=" << data->args.hipHostGetDevicePointer.hstPtr << "," - << " flags=" << data->args.hipHostGetDevicePointer.flags - << ")"; - break; - case HIP_API_ID_hipMemcpyParam2D: - oss << "hipMemcpyParam2D(" - << " pCopy=" << data->args.hipMemcpyParam2D.pCopy - << ")"; - break; - case HIP_API_ID_hipConfigureCall: - oss << "hipConfigureCall(" - << " gridDim=" << data->args.hipConfigureCall.gridDim << "," - << " blockDim=" << data->args.hipConfigureCall.blockDim << "," - << " sharedMem=" << data->args.hipConfigureCall.sharedMem << "," - << " stream=" << data->args.hipConfigureCall.stream - << ")"; - break; - case HIP_API_ID_hipModuleGetFunction: - oss << "hipModuleGetFunction(" - << " function=" << data->args.hipModuleGetFunction.function << "," - << " module=" << data->args.hipModuleGetFunction.module << "," - << " kname=" << data->args.hipModuleGetFunction.kname - << ")"; - break; - case HIP_API_ID_hipGetDevice: - oss << "hipGetDevice(" - << " deviceId=" << data->args.hipGetDevice.deviceId - << ")"; - break; - case HIP_API_ID_hipGetDeviceCount: - oss << "hipGetDeviceCount(" - << " count=" << data->args.hipGetDeviceCount.count - << ")"; - break; - default: oss << "unknown"; - }; - return strdup(oss.str().c_str()); -}; -#endif - -#endif // _HIP_CBSTR diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_runtime.h b/src/utils/amd_hip/hip/hcc_detail/hip_runtime.h deleted file mode 100644 index 60d145c88..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_runtime.h +++ /dev/null @@ -1,481 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * @file hcc_detail/hip_runtime.h - * @brief Contains definitions of APIs for HIP runtime. - */ - -//#pragma once -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H - -#if defined(__HCC__) -#define __HCC_OR_HIP_CLANG__ 1 -#define __HCC_ONLY__ 1 -#define __HIP_CLANG_ONLY__ 0 -#elif defined(__clang__) && defined(__HIP__) -#define __HCC_OR_HIP_CLANG__ 1 -#define __HCC_ONLY__ 0 -#define __HIP_CLANG_ONLY__ 1 -#else -#define __HCC_OR_HIP_CLANG__ 0 -#define __HCC_ONLY__ 0 -#define __HIP_CLANG_ONLY__ 0 -#endif - -//--- -// Top part of file can be compiled with any compiler - -//#include -#if __cplusplus -#include -#else -#include -#include -#include -#endif //__cplusplus - -#if __HCC_OR_HIP_CLANG__ - -#define CUDA_SUCCESS hipSuccess - -#include -#endif // __HCC_OR_HIP_CLANG__ - -#if __HCC__ -// define HIP_ENABLE_PRINTF to enable printf -#ifdef HIP_ENABLE_PRINTF -#define HCC_ENABLE_ACCELERATOR_PRINTF 1 -#endif - -//--- -// Remainder of this file only compiles with HCC -#if defined __HCC__ -#include "grid_launch.h" -#include "hc_printf.hpp" -// TODO-HCC-GL - change this to typedef. -// typedef grid_launch_parm hipLaunchParm ; - -#if GENERIC_GRID_LAUNCH == 0 -#define hipLaunchParm grid_launch_parm -#else -namespace hip_impl { -struct Empty_launch_parm {}; -} // namespace hip_impl -#define hipLaunchParm hip_impl::Empty_launch_parm -#endif // GENERIC_GRID_LAUNCH - -#if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1 -#else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0. -#error(HCC must support GRID_LAUNCH_20) -#endif // GRID_LAUNCH_VERSION - -#endif // HCC - -#if GENERIC_GRID_LAUNCH == 1 && defined __HCC__ -#include "grid_launch_GGL.hpp" -#endif // GENERIC_GRID_LAUNCH - -#endif // HCC - -#if __HCC_OR_HIP_CLANG__ -extern int HIP_TRACE_API; - -#ifdef __cplusplus -#include -#endif -#include -#include -#include -#include -#include -#if __HCC__ - #include -#endif -// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. -#if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) -#define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__ -#endif - -// TODO-HCC add a dummy implementation of assert, need to replace with a proper kernel exit call. -#if __HIP_DEVICE_COMPILE__ == 1 -#undef assert -#define assert(COND) \ - { \ - if (!(COND)) { \ - abort(); \ - } \ - } -#endif - - -// Feature tests: -#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__ -// Device compile and not host compile: - -// 32-bit Atomics: -#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) -#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) -#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) -#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) -#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1) - -// 64-bit Atomics: -#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1) -#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) - -// Doubles -#define __HIP_ARCH_HAS_DOUBLES__ (1) - -// warp cross-lane operations: -#define __HIP_ARCH_HAS_WARP_VOTE__ (1) -#define __HIP_ARCH_HAS_WARP_BALLOT__ (1) -#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1) -#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) - -// sync -#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1) -#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) - -// misc -#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) -#define __HIP_ARCH_HAS_3DGRID__ (1) -#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) - -#endif /* Device feature flags */ - - -#define launch_bounds_impl0(requiredMaxThreadsPerBlock) \ - __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock))) -#define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \ - __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \ - amdgpu_waves_per_eu(minBlocksPerMultiprocessor))) -#define select_impl_(_1, _2, impl_, ...) impl_ -#define __launch_bounds__(...) \ - select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) - -// Detect if we are compiling C++ mode or C mode -#if defined(__cplusplus) -#define __HCC_CPP__ -#elif defined(__STDC_VERSION__) -#define __HCC_C__ -#endif - -__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; } - -#if __HIP_ARCH_GFX701__ == 0 - -__device__ unsigned __hip_ds_bpermute(int index, unsigned src); -__device__ float __hip_ds_bpermutef(int index, float src); -__device__ unsigned __hip_ds_permute(int index, unsigned src); -__device__ float __hip_ds_permutef(int index, float src); - -__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern); -__device__ float __hip_ds_swizzlef(float src, int pattern); - -__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl); - -#endif //__HIP_ARCH_GFX803__ == 1 - -#endif // __HCC_OR_HIP_CLANG__ - -#if defined __HCC__ - -template < - typename std::common_type::type f> -class Coordinates { - using R = decltype(f(0)); - - struct X { - __device__ operator R() const { return f(0); } - }; - struct Y { - __device__ operator R() const { return f(1); } - }; - struct Z { - __device__ operator R() const { return f(2); } - }; - - public: - static constexpr X x{}; - static constexpr Y y{}; - static constexpr Z z{}; -}; - -static constexpr Coordinates blockDim; -static constexpr Coordinates blockIdx; -static constexpr Coordinates gridDim; -static constexpr Coordinates threadIdx; - -#define hipThreadIdx_x (hc_get_workitem_id(0)) -#define hipThreadIdx_y (hc_get_workitem_id(1)) -#define hipThreadIdx_z (hc_get_workitem_id(2)) - -#define hipBlockIdx_x (hc_get_group_id(0)) -#define hipBlockIdx_y (hc_get_group_id(1)) -#define hipBlockIdx_z (hc_get_group_id(2)) - -#define hipBlockDim_x (hc_get_group_size(0)) -#define hipBlockDim_y (hc_get_group_size(1)) -#define hipBlockDim_z (hc_get_group_size(2)) - -#define hipGridDim_x (hc_get_num_groups(0)) -#define hipGridDim_y (hc_get_num_groups(1)) -#define hipGridDim_z (hc_get_num_groups(2)) - -#endif // defined __HCC__ -#if __HCC_OR_HIP_CLANG__ -extern "C" __device__ void* __hip_malloc(size_t); -extern "C" __device__ void* __hip_free(void* ptr); - -static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); } -static inline __device__ void* free(void* ptr) { return __hip_free(ptr); } - -#if defined(__HCC_ACCELERATOR__) && defined(HC_FEATURE_PRINTF) -template -static inline __device__ void printf(const char* format, All... all) { - hc::printf(format, all...); -} -#elif defined(__HCC_ACCELERATOR__) || __HIP__ -template -static inline __device__ void printf(const char* format, All... all) {} -#endif - -#endif //__HCC_OR_HIP_CLANG__ - -#ifdef __HCC__ - -#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) - -#define HIP_KERNEL_NAME(...) (__VA_ARGS__) -#define HIP_SYMBOL(X) #X - -#if defined __HCC_CPP__ -extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, - grid_launch_parm* lp, const char* kernelNameStr); -extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, - grid_launch_parm* lp, const char* kernelNameStr); -extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, - grid_launch_parm* lp, const char* kernelNameStr); -extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, - grid_launch_parm* lp, const char* kernelNameStr); -extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, grid_launch_parm& lp); - -#if GENERIC_GRID_LAUNCH == 0 -//#warning "Original hipLaunchKernel defined" -// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be -// either size_t or dim3 types -#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \ - do { \ - grid_launch_parm lp; \ - lp.dynamic_group_mem_bytes = _groupMemBytes; \ - hipStream_t trueStream = \ - (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \ - _kernelName(lp, ##__VA_ARGS__); \ - ihipPostLaunchKernel(#_kernelName, trueStream, lp); \ - } while (0) -#endif // GENERIC_GRID_LAUNCH - -#elif defined(__HCC_C__) - -// TODO - develop C interface. - -#endif //__HCC_CPP__ - -/** - * @defgroup HIP-ENV HIP Environment Variables - * @{ - */ -// extern int HIP_PRINT_ENV ; ///< Print all HIP-related environment variables. -// extern int HIP_TRACE_API; ///< Trace HIP APIs. -// extern int HIP_LAUNCH_BLOCKING ; ///< Make all HIP APIs host-synchronous - -/** - * @} - */ - - -// End doxygen API: -/** - * @} - */ - -// -// hip-clang functions -// -#elif defined(__clang__) && defined(__HIP__) - -#define HIP_KERNEL_NAME(...) __VA_ARGS__ -#define HIP_SYMBOL(X) #X - -typedef int hipLaunchParm; - -template -inline void hipLaunchKernelGGL(F&& kernelName, const dim3& numblocks, const dim3& numthreads, - unsigned memperblock, hipStream_t streamId, Args... args) { - kernelName<<>>(args...); -} - -template -inline void hipLaunchKernel(F&& kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t groupMemBytes, hipStream_t stream, Args... args) { - hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{}, - std::move(args)...); -} - -#include - -#pragma push_macro("__DEVICE__") -#define __DEVICE__ static __device__ __forceinline__ - -extern "C" __device__ size_t __ockl_get_local_id(uint); -__DEVICE__ uint __hip_get_thread_idx_x() { return __ockl_get_local_id(0); } -__DEVICE__ uint __hip_get_thread_idx_y() { return __ockl_get_local_id(1); } -__DEVICE__ uint __hip_get_thread_idx_z() { return __ockl_get_local_id(2); } - -extern "C" __device__ size_t __ockl_get_group_id(uint); -__DEVICE__ uint __hip_get_block_idx_x() { return __ockl_get_group_id(0); } -__DEVICE__ uint __hip_get_block_idx_y() { return __ockl_get_group_id(1); } -__DEVICE__ uint __hip_get_block_idx_z() { return __ockl_get_group_id(2); } - -extern "C" __device__ size_t __ockl_get_local_size(uint); -__DEVICE__ uint __hip_get_block_dim_x() { return __ockl_get_local_size(0); } -__DEVICE__ uint __hip_get_block_dim_y() { return __ockl_get_local_size(1); } -__DEVICE__ uint __hip_get_block_dim_z() { return __ockl_get_local_size(2); } - -extern "C" __device__ size_t __ockl_get_num_groups(uint); -__DEVICE__ uint __hip_get_grid_dim_x() { return __ockl_get_num_groups(0); } -__DEVICE__ uint __hip_get_grid_dim_y() { return __ockl_get_num_groups(1); } -__DEVICE__ uint __hip_get_grid_dim_z() { return __ockl_get_num_groups(2); } - -#define __HIP_DEVICE_BUILTIN(DIMENSION, FUNCTION) \ - __declspec(property(get = __get_##DIMENSION)) uint DIMENSION; \ - __DEVICE__ uint __get_##DIMENSION(void) { \ - return FUNCTION; \ - } - -struct __hip_builtin_threadIdx_t { - __HIP_DEVICE_BUILTIN(x,__hip_get_thread_idx_x()); - __HIP_DEVICE_BUILTIN(y,__hip_get_thread_idx_y()); - __HIP_DEVICE_BUILTIN(z,__hip_get_thread_idx_z()); -}; - -struct __hip_builtin_blockIdx_t { - __HIP_DEVICE_BUILTIN(x,__hip_get_block_idx_x()); - __HIP_DEVICE_BUILTIN(y,__hip_get_block_idx_y()); - __HIP_DEVICE_BUILTIN(z,__hip_get_block_idx_z()); -}; - -struct __hip_builtin_blockDim_t { - __HIP_DEVICE_BUILTIN(x,__hip_get_block_dim_x()); - __HIP_DEVICE_BUILTIN(y,__hip_get_block_dim_y()); - __HIP_DEVICE_BUILTIN(z,__hip_get_block_dim_z()); -}; - -struct __hip_builtin_gridDim_t { - __HIP_DEVICE_BUILTIN(x,__hip_get_grid_dim_x()); - __HIP_DEVICE_BUILTIN(y,__hip_get_grid_dim_y()); - __HIP_DEVICE_BUILTIN(z,__hip_get_grid_dim_z()); -}; - -#undef __HIP_DEVICE_BUILTIN -#pragma pop_macro("__DEVICE__") - -extern const __device__ __attribute__((weak)) __hip_builtin_threadIdx_t threadIdx; -extern const __device__ __attribute__((weak)) __hip_builtin_blockIdx_t blockIdx; -extern const __device__ __attribute__((weak)) __hip_builtin_blockDim_t blockDim; -extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim; - - -#define hipThreadIdx_x threadIdx.x -#define hipThreadIdx_y threadIdx.y -#define hipThreadIdx_z threadIdx.z - -#define hipBlockIdx_x blockIdx.x -#define hipBlockIdx_y blockIdx.y -#define hipBlockIdx_z blockIdx.z - -#define hipBlockDim_x blockDim.x -#define hipBlockDim_y blockDim.y -#define hipBlockDim_z blockDim.z - -#define hipGridDim_x gridDim.x -#define hipGridDim_y gridDim.y -#define hipGridDim_z gridDim.z - -#include - -#if __HIP_HCC_COMPAT_MODE__ -// Define HCC work item functions in terms of HIP builtin variables. -#pragma push_macro("__DEFINE_HCC_FUNC") -#define __DEFINE_HCC_FUNC(hc_fun,hip_var) \ -inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \ - if (i==0) \ - return hip_var.x; \ - else if(i==1) \ - return hip_var.y; \ - else \ - return hip_var.z; \ -} - -__DEFINE_HCC_FUNC(workitem_id, threadIdx) -__DEFINE_HCC_FUNC(group_id, blockIdx) -__DEFINE_HCC_FUNC(group_size, blockDim) -__DEFINE_HCC_FUNC(num_groups, gridDim) -#pragma pop_macro("__DEFINE_HCC_FUNC") - -extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(uint); -inline __device__ __attribute__((always_inline)) uint -hc_get_workitem_absolute_id(int dim) -{ - return (uint)__ockl_get_global_id(dim); -} - -#endif - -// Support std::complex. -#pragma push_macro("__CUDA__") -#define __CUDA__ -#include <__clang_cuda_math_forward_declares.h> -#include <__clang_cuda_complex_builtins.h> -#include -#include -#include -#undef __CUDA__ -#pragma pop_macro("__CUDA__") - - -hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, - uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, - uint32_t localWorkSizeX, uint32_t localWorkSizeY, - uint32_t localWorkSizeZ, size_t sharedMemBytes, - hipStream_t hStream, void** kernelParams, void** extra, - hipEvent_t startEvent = nullptr, - hipEvent_t stopEvent = nullptr); - -#endif // defined(__clang__) && defined(__HIP__) - -#include - -#endif // HIP_HCC_DETAIL_RUNTIME_H diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_runtime_api.h b/src/utils/amd_hip/hip/hcc_detail/hip_runtime_api.h deleted file mode 100644 index b6ae88729..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_runtime_api.h +++ /dev/null @@ -1,2860 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -//#pragma once -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H -/** - * @file hcc_detail/hip_runtime_api.h - * @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or - * special language extensions (-hc mode) ; those functions in hip_runtime.h. - */ -#include -#include - -#ifndef GENERIC_GRID_LAUNCH -#define GENERIC_GRID_LAUNCH 1 -#endif - -#include -#include -#include -#include -#include - -#if defined(_MSC_VER) -#define DEPRECATED(msg) __declspec(deprecated(msg)) -#else // !defined(_MSC_VER) -#define DEPRECATED(msg) __attribute__ ((deprecated(msg))) -#endif // !defined(_MSC_VER) - -#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases.For more details please refer https://github.com/ROCm-Developer-Tools/HIP/tree/master/docs/markdown/hip_deprecated_api_list" - -#if defined(__HCC__) && (__hcc_workweek__ < 16155) -#error("This version of HIP requires a newer version of HCC."); -#endif - -#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) -#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) -#define HIP_LAUNCH_PARAM_END ((void*)0x03) - -#ifdef __cplusplus - #define __dparm(x) \ - = x -#else - #define __dparm(x) -#endif - -// Structure definitions: -#ifdef __cplusplus -extern "C" { -#endif - -//--- -// API-visible structures -typedef struct ihipCtx_t* hipCtx_t; - -// Note many APIs also use integer deviceIds as an alternative to the device pointer: -typedef int hipDevice_t; - -typedef struct ihipStream_t* hipStream_t; - -// TODO: IPC implementation - -#define hipIpcMemLazyEnablePeerAccess 0 - -#define HIP_IPC_HANDLE_SIZE 64 - -typedef struct hipIpcMemHandle_st { - char reserved[HIP_IPC_HANDLE_SIZE]; -} hipIpcMemHandle_t; - -// TODO: IPC event handle currently unsupported -struct ihipIpcEventHandle_t; -typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t; - - -// END TODO - -typedef struct ihipModule_t* hipModule_t; - -typedef struct ihipModuleSymbol_t* hipFunction_t; - -typedef struct hipFuncAttributes { - int binaryVersion; - int cacheModeCA; - size_t constSizeBytes; - size_t localSizeBytes; - int maxDynamicSharedSizeBytes; - int maxThreadsPerBlock; - int numRegs; - int preferredShmemCarveout; - int ptxVersion; - size_t sharedSizeBytes; -} hipFuncAttributes; - -typedef struct ihipEvent_t* hipEvent_t; - -enum hipLimit_t { - hipLimitMallocHeapSize = 0x02, -}; - -/** - * @addtogroup GlobalDefs More - * @{ - */ -//! Flags that can be used with hipStreamCreateWithFlags -#define hipStreamDefault \ - 0x00 ///< Default stream creation flags. These are used with hipStreamCreate(). -#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream - - -//! Flags that can be used with hipEventCreateWithFlags: -#define hipEventDefault 0x0 ///< Default flags -#define hipEventBlockingSync \ - 0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency. -#define hipEventDisableTiming \ - 0x2 ///< Disable event's capability to record timing information. May improve performance. -#define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP. -#define hipEventReleaseToDevice \ - 0x40000000 /// < Use a device-scope release when recording this event. This flag is useful to - /// obtain more precise timings of commands between events. The flag is a no-op on - /// CUDA platforms. -#define hipEventReleaseToSystem \ - 0x80000000 /// < Use a system-scope release that when recording this event. This flag is - /// useful to make non-coherent host memory visible to the host. The flag is a - /// no-op on CUDA platforms. - - -//! Flags that can be used with hipHostMalloc -#define hipHostMallocDefault 0x0 -#define hipHostMallocPortable 0x1 ///< Memory is considered allocated by all contexts. -#define hipHostMallocMapped \ - 0x2 ///< Map the allocation into the address space for the current device. The device pointer - ///< can be obtained with #hipHostGetDevicePointer. -#define hipHostMallocWriteCombined 0x4 -#define hipHostMallocCoherent \ - 0x40000000 ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific - ///< allocation. -#define hipHostMallocNonCoherent \ - 0x80000000 ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific - ///< allocation. - - -//! Flags that can be used with hipHostRegister -#define hipHostRegisterDefault 0x0 ///< Memory is Mapped and Portable -#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts. -#define hipHostRegisterMapped \ - 0x2 ///< Map the allocation into the address space for the current device. The device pointer - ///< can be obtained with #hipHostGetDevicePointer. -#define hipHostRegisterIoMemory 0x4 ///< Not supported. - - -#define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield -#define hipDeviceScheduleSpin \ - 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and - ///< may consume more power. -#define hipDeviceScheduleYield \ - 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers - ///< power and is friendlier to other threads in the system. -#define hipDeviceScheduleBlockingSync 0x4 -#define hipDeviceScheduleMask 0x7 - -#define hipDeviceMapHost 0x8 -#define hipDeviceLmemResizeToMax 0x16 - -#define hipArrayDefault 0x00 ///< Default HIP array allocation flag -#define hipArrayLayered 0x01 -#define hipArraySurfaceLoadStore 0x02 -#define hipArrayCubemap 0x04 -#define hipArrayTextureGather 0x08 - -/* - * @brief hipJitOption - * @enum - * @ingroup Enumerations - */ -typedef enum hipJitOption { - hipJitOptionMaxRegisters = 0, - hipJitOptionThreadsPerBlock, - hipJitOptionWallTime, - hipJitOptionInfoLogBuffer, - hipJitOptionInfoLogBufferSizeBytes, - hipJitOptionErrorLogBuffer, - hipJitOptionErrorLogBufferSizeBytes, - hipJitOptionOptimizationLevel, - hipJitOptionTargetFromContext, - hipJitOptionTarget, - hipJitOptionFallbackStrategy, - hipJitOptionGenerateDebugInfo, - hipJitOptionLogVerbose, - hipJitOptionGenerateLineInfo, - hipJitOptionCacheMode, - hipJitOptionSm3xOpt, - hipJitOptionFastCompile, - hipJitOptionNumOptions -} hipJitOption; - - -/** - * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. - */ -typedef enum hipFuncCache_t { - hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default) - hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache - hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory - hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory -} hipFuncCache_t; - - -/** - * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. - */ -typedef enum hipSharedMemConfig { - hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking. - hipSharedMemBankSizeFourByte, ///< Shared mem is banked at 4-bytes intervals and performs best - ///< when adjacent threads access data 4 bytes apart. - hipSharedMemBankSizeEightByte ///< Shared mem is banked at 8-byte intervals and performs best - ///< when adjacent threads access data 4 bytes apart. -} hipSharedMemConfig; - - -/** - * Struct for data in 3D - * - */ -typedef struct dim3 { - uint32_t x; ///< x - uint32_t y; ///< y - uint32_t z; ///< z -#ifdef __cplusplus - dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){}; -#endif -} dim3; - - -// Doxygen end group GlobalDefs -/** @} */ - - -//------------------------------------------------------------------------------------------------- - - -// The handle allows the async commands to use the stream even if the parent hipStream_t goes -// out-of-scope. -// typedef class ihipStream_t * hipStream_t; - - -/* - * Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the - * surrounding hipEvent_t goes out-of-scope. This is handy for cases where the hipEvent_t goes - * out-of-scope but the true event is being written by some async queue or device */ -// typedef struct hipEvent_t { -// struct ihipEvent_t *_handle; -//} hipEvent_t; - - -/** - * @defgroup API HIP API - * @{ - * - * Defines the HIP API. See the individual sections for more information. - */ - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Device Device Management - * @{ - */ - -/** - * @brief Waits on all active streams on current device - * - * When this command is invoked, the host thread gets blocked until all the commands associated - * with streams associated with the device. HIP does not support multiple blocking modes (yet!). - * - * @returns #hipSuccess - * - * @see hipSetDevice, hipDeviceReset - */ -hipError_t hipDeviceSynchronize(void); - - -/** - * @brief The state of current device is discarded and updated to a fresh state. - * - * Calling this function deletes all streams created, memory allocated, kernels running, events - * created. Make sure that no other thread is using the device or streams, memory, kernels, events - * associated with the current device. - * - * @returns #hipSuccess - * - * @see hipDeviceSynchronize - */ -hipError_t hipDeviceReset(void); - - -/** - * @brief Set default device to be used for subsequent hip API calls from this thread. - * - * @param[in] deviceId Valid device in range 0...hipGetDeviceCount(). - * - * Sets @p device as the default device for the calling host thread. Valid device id's are 0... - * (hipGetDeviceCount()-1). - * - * Many HIP APIs implicitly use the "default device" : - * - * - Any device memory subsequently allocated from this host thread (using hipMalloc) will be - * allocated on device. - * - Any streams or events created from this host thread will be associated with device. - * - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device - * (unless a specific stream is specified, in which case the device associated with that stream will - * be used). - * - * This function may be called from any host thread. Multiple host threads may use the same device. - * This function does no synchronization with the previous or new device, and has very little - * runtime overhead. Applications can use hipSetDevice to quickly switch the default device before - * making a HIP runtime call which uses the default device. - * - * The default device is stored in thread-local-storage for each thread. - * Thread-pool implementations may inherit the default device of the previous thread. A good - * practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known - * standard device. - * - * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorDeviceAlreadyInUse - * - * @see hipGetDevice, hipGetDeviceCount - */ -hipError_t hipSetDevice(int deviceId); - - -/** - * @brief Return the default device id for the calling host thread. - * - * @param [out] device *device is written with the default device - * - * HIP maintains an default device for each thread using thread-local-storage. - * This device is used implicitly for HIP runtime APIs called by this thread. - * hipGetDevice returns in * @p device the default device for the calling host thread. - * - * @returns #hipSuccess - * - * @see hipSetDevice, hipGetDevicesizeBytes - */ -hipError_t hipGetDevice(int* deviceId); - - -/** - * @brief Return number of compute-capable devices. - * - * @param [output] count Returns number of compute-capable devices. - * - * @returns #hipSuccess, #hipErrorNoDevice - * - * - * Returns in @p *count the number of devices that have ability to run compute commands. If there - * are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice. If 1 or more - * devices can be found, then hipGetDeviceCount returns #hipSuccess. - */ -hipError_t hipGetDeviceCount(int* count); - -/** - * @brief Query for a specific device attribute. - * - * @param [out] pi pointer to value to return - * @param [in] attr attribute to query - * @param [in] deviceId which device to query for information - * - * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue - */ -hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId); - -/** - * @brief Returns device properties. - * - * @param [out] prop written with device properties - * @param [in] deviceId which device to query for information - * - * @return #hipSuccess, #hipErrorInvalidDevice - * @bug HCC always returns 0 for maxThreadsPerMultiProcessor - * @bug HCC always returns 0 for regsPerBlock - * @bug HCC always returns 0 for l2CacheSize - * - * Populates hipGetDeviceProperties with information for the specified device. - */ -hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId); - - -/** - * @brief Set L1/Shared cache partition. - * - * @param [in] cacheConfig - * - * @returns #hipSuccess, #hipErrorInitializationError - * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored - * on those architectures. - * - */ -hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig); - - -/** - * @brief Set Cache configuration for a specific function - * - * @param [in] cacheConfig - * - * @returns #hipSuccess, #hipErrorInitializationError - * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored - * on those architectures. - * - */ -hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig); - -/** - * @brief Get Resource limits of current device - * - * @param [out] pValue - * @param [in] limit - * - * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue - * Note: Currently, only hipLimitMallocHeapSize is available - * - */ -hipError_t hipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit); - - -/** - * @brief Set Cache configuration for a specific function - * - * @param [in] config; - * - * @returns #hipSuccess, #hipErrorInitializationError - * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored - * on those architectures. - * - */ -hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config); - -/** - * @brief Returns bank width of shared memory for current device - * - * @param [out] pConfig - * - * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError - * - * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is - * ignored on those architectures. - * - */ -hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig); - - -/** - * @brief The bank width of shared memory on current device is set - * - * @param [in] config - * - * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError - * - * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is - * ignored on those architectures. - * - */ -hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config); - -/** - * @brief The current device behavior is changed according the flags passed. - * - * @param [in] flags - * - * The schedule flags impact how HIP waits for the completion of a command running on a device. - * hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted the - * work until the command completes. This offers the lowest latency, but will consume a CPU core - * and may increase power. hipDeviceScheduleYield : The HIP runtime will yield the CPU to - * system so that other tasks can use it. This may increase latency to detect the completion but - * will consume less power and is friendlier to other tasks in the system. - * hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield. - * hipDeviceScheduleAuto : Use a hueristic to select between Spin and Yield modes. If the - * number of HIP contexts is greater than the number of logical processors in the system, use Spin - * scheduling. Else use Yield scheduling. - * - * - * hipDeviceMapHost : Allow mapping host memory. On ROCM, this is always allowed and - * the flag is ignored. hipDeviceLmemResizeToMax : @warning ROCm silently ignores this flag. - * - * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess - * - * - */ -hipError_t hipSetDeviceFlags(unsigned flags); - -/** - * @brief Device which matches hipDeviceProp_t is returned - * - * @param [out] device ID - * @param [in] device properties pointer - * - * @returns #hipSuccess, #hipErrorInvalidValue - */ -hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop); - -// end doxygen Device -/** - * @} - */ - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Error Error Handling - * @{ - */ - -/** - * @brief Return last error returned by any HIP runtime API call and resets the stored error code to - * #hipSuccess - * - * @returns return code from last HIP called from the active host thread - * - * Returns the last error that has been returned by any of the runtime calls in the same host - * thread, and then resets the saved error to #hipSuccess. - * - * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t - */ -hipError_t hipGetLastError(void); - - -/** - * @brief Return last error returned by any HIP runtime API call. - * - * @return #hipSuccess - * - * Returns the last error that has been returned by any of the runtime calls in the same host - * thread. Unlike hipGetLastError, this function does not reset the saved error code. - * - * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t - */ -hipError_t hipPeekAtLastError(void); - - -/** - * @brief Return name of the specified error code in text form. - * - * @param hip_error Error code to convert to name. - * @return const char pointer to the NULL-terminated error name - * - * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t - */ -const char* hipGetErrorName(hipError_t hip_error); - - -/** - * @brief Return handy text string message to explain the error which occurred - * - * @param hipError Error code to convert to string. - * @return const char pointer to the NULL-terminated error string - * - * @warning : on HCC, this function returns the name of the error (same as hipGetErrorName) - * - * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t - */ -const char* hipGetErrorString(hipError_t hipError); - -// end doxygen Error -/** - * @} - */ - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Stream Stream Management - * @{ - * - * The following Stream APIs are not (yet) supported in HIP: - * - cudaStreamAttachMemAsync - */ - - -/** - * @brief Create an asynchronous stream. - * - * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the - * newly created stream. - * @return #hipSuccess, #hipErrorInvalidValue - * - * Create a new asynchronous stream. @p stream returns an opaque handle that can be used to - * reference the newly created stream in subsequent hipStream* commands. The stream is allocated on - * the heap and will remain allocated even if the handle goes out-of-scope. To release the memory - * used by the stream, applicaiton must call hipStreamDestroy. - * - * @return #hipSuccess, #hipErrorInvalidValue - * - * @see hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy - */ -hipError_t hipStreamCreate(hipStream_t* stream); - - -/** - * @brief Create an asynchronous stream. - * - * @param[in, out] stream Pointer to new stream - * @param[in ] flags to control stream creation. - * @return #hipSuccess, #hipErrorInvalidValue - * - * Create a new asynchronous stream. @p stream returns an opaque handle that can be used to - * reference the newly created stream in subsequent hipStream* commands. The stream is allocated on - * the heap and will remain allocated even if the handle goes out-of-scope. To release the memory - * used by the stream, applicaiton must call hipStreamDestroy. Flags controls behavior of the - * stream. See #hipStreamDefault, #hipStreamNonBlocking. - * - * - * @see hipStreamCreate, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy - */ - -hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags); - - -/** - * @brief Create an asynchronous stream with the specified priority. - * - * @param[in, out] stream Pointer to new stream - * @param[in ] flags to control stream creation. - * @param[in ] priority of the stream. Lower numbers represent higher priorities. - * @return #hipSuccess, #hipErrorInvalidValue - * - * Create a new asynchronous stream with the specified priority. @p stream returns an opaque handle - * that can be used to reference the newly created stream in subsequent hipStream* commands. The - * stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope. - * To release the memory used by the stream, applicaiton must call hipStreamDestroy. Flags controls - * behavior of the stream. See #hipStreamDefault, #hipStreamNonBlocking. - * - * - * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy - */ - -hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority); - - -/** - * @brief Returns numerical values that correspond to the least and greatest stream priority. - * - * @param[in, out] leastPriority pointer in which value corresponding to least priority is returned. - * @param[in, out] greatestPriority pointer in which value corresponding to greatest priority is returned. - * - * Returns in *leastPriority and *greatestPriority the numerical values that correspond to the least - * and greatest stream priority respectively. Stream priorities follow a convention where lower numbers - * imply greater priorities. The range of meaningful stream priorities is given by - * [*greatestPriority, *leastPriority]. If the user attempts to create a stream with a priority value - * that is outside the the meaningful range as specified by this API, the priority is automatically - * clamped to within the valid range. - */ - -hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority); - - -/** - * @brief Destroys the specified stream. - * - * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the - * newly created stream. - * @return #hipSuccess #hipErrorInvalidResourceHandle - * - * Destroys the specified stream. - * - * If commands are still executing on the specified stream, some may complete execution before the - * queue is deleted. - * - * The queue may be destroyed while some commands are still inflight, or may wait for all commands - * queued to the stream before destroying it. - * - * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery, hipStreamWaitEvent, - * hipStreamSynchronize - */ -hipError_t hipStreamDestroy(hipStream_t stream); - - -/** - * @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or - * #hipErrorNotReady if not. - * - * @param[in] stream stream to query - * - * @return #hipSuccess, #hipErrorNotReady, #hipErrorInvalidResourceHandle - * - * This is thread-safe and returns a snapshot of the current state of the queue. However, if other - * host threads are sending work to the stream, the status may change immediately after the function - * is called. It is typically used for debug. - * - * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamSynchronize, - * hipStreamDestroy - */ -hipError_t hipStreamQuery(hipStream_t stream); - - -/** - * @brief Wait for all commands in stream to complete. - * - * @param[in] stream stream identifier. - * - * @return #hipSuccess, #hipErrorInvalidResourceHandle - * - * This command is host-synchronous : the host will block until the specified stream is empty. - * - * This command follows standard null-stream semantics. Specifically, specifying the null stream - * will cause the command to wait for other streams on the same device to complete all pending - * operations. - * - * This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active - * or blocking. - * - * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamDestroy - * - */ -hipError_t hipStreamSynchronize(hipStream_t stream); - - -/** - * @brief Make the specified compute stream wait for an event - * - * @param[in] stream stream to make wait. - * @param[in] event event to wait on - * @param[in] flags control operation [must be 0] - * - * @return #hipSuccess, #hipErrorInvalidResourceHandle - * - * This function inserts a wait operation into the specified stream. - * All future work submitted to @p stream will wait until @p event reports completion before - * beginning execution. - * - * This function only waits for commands in the current stream to complete. Notably,, this function - * does not impliciy wait for commands in the default stream to complete, even if the specified - * stream is created with hipStreamNonBlocking = 0. - * - * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamDestroy - */ -hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags); - - -/** - * @brief Return flags associated with this stream. - * - * @param[in] stream stream to be queried - * @param[in,out] flags Pointer to an unsigned integer in which the stream's flags are returned - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle - * - * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidResourceHandle - * - * Return flags associated with this stream in *@p flags. - * - * @see hipStreamCreateWithFlags - */ -hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags); - - -/** - * @brief Query the priority of a stream. - * - * @param[in] stream stream to be queried - * @param[in,out] priority Pointer to an unsigned integer in which the stream's priority is returned - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle - * - * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidResourceHandle - * - * Query the priority of a stream. The priority is returned in in priority. - * - * @see hipStreamCreateWithFlags - */ -hipError_t hipStreamGetPriority(hipStream_t stream, int* priority); - - -/** - * Stream CallBack struct - */ -typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); - -/** - * @brief Adds a callback to be called on the host after all currently enqueued - * items in the stream have completed. For each - * cudaStreamAddCallback call, a callback will be executed exactly once. - * The callback will block later work in the stream until it is finished. - * @param[in] stream - Stream to add callback to - * @param[in] callback - The function to call once preceding stream operations are complete - * @param[in] userData - User specified data to be passed to the callback function - * @param[in] flags - Reserved for future use, must be 0 - * @return #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorNotSupported - * - * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize, - * hipStreamWaitEvent, hipStreamDestroy, hipStreamCreateWithPriority - * - */ -hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, - unsigned int flags); - - -// end doxygen Stream -/** - * @} - */ - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Event Event Management - * @{ - */ - -/** - * @brief Create an event with the specified flags - * - * @param[in,out] event Returns the newly created event. - * @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault, - #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess - - * #hipEventDefault : Default flag. The event will use active synchronization and will support - timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a - CPU to poll on the eevent. - * #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is - called on this event, the thread will block until the event completes. This can increase latency - for the synchroniation but can result in lower power and more resources for other CPU threads. - * #hipEventDisableTiming : Disable recording of timing information. On ROCM platform, timing - information is always recorded and this flag has no performance benefit. - - * @warning On HCC platform, hipEventInterprocess support is under development. Use of this flag - will return an error. - * - * @returns #hipSuccess, #hipErrorInitializationError, #hipErrorInvalidValue, - #hipErrorLaunchFailure, #hipErrorMemoryAllocation - * - * @see hipEventCreate, hipEventSynchronize, hipEventDestroy, hipEventElapsedTime - */ -hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags); - - -/** - * Create an event - * - * @param[in,out] event Returns the newly created event. - * - * @returns #hipSuccess, #hipErrorInitializationError, #hipErrorInvalidValue, - * #hipErrorLaunchFailure, #hipErrorMemoryAllocation - * - * @see hipEventCreateWithFlags, hipEventRecord, hipEventQuery, hipEventSynchronize, - * hipEventDestroy, hipEventElapsedTime - */ -hipError_t hipEventCreate(hipEvent_t* event); - - -/** - * @brief Record an event in the specified stream. - * - * @param[in] event event to record. - * @param[in] stream stream in which to record event. - * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError, - * #hipErrorInvalidResourceHandle, #hipErrorLaunchFailure - * - * hipEventQuery() or hipEventSynchronize() must be used to determine when the event - * transitions from "recording" (after hipEventRecord() is called) to "recorded" - * (when timestamps are set, if requested). - * - * Events which are recorded in a non-NULL stream will transition to - * from recording to "recorded" state when they reach the head of - * the specified stream, after all previous - * commands in that stream have completed executing. - * - * If hipEventRecord() has been previously called on this event, then this call will overwrite any - * existing state in event. - * - * If this function is called on a an event that is currently being recorded, results are undefined - * - either outstanding recording may save state into the event, and the order is not guaranteed. - * - * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, - * hipEventDestroy, hipEventElapsedTime - * - */ -#ifdef __cplusplus -hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL); -#else -hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream); -#endif - -/** - * @brief Destroy the specified event. - * - * @param[in] event Event to destroy. - * @returns #hipSuccess, #hipErrorInitializationError, #hipErrorInvalidValue, - * #hipErrorLaunchFailure - * - * Releases memory associated with the event. If the event is recording but has not completed - * recording when hipEventDestroy() is called, the function will return immediately and the - * completion_future resources will be released later, when the hipDevice is synchronized. - * - * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, hipEventRecord, - * hipEventElapsedTime - * - * @returns #hipSuccess - */ -hipError_t hipEventDestroy(hipEvent_t event); - - -/** - * @brief Wait for an event to complete. - * - * This function will block until the event is ready, waiting for all previous work in the stream - * specified when event was recorded with hipEventRecord(). - * - * If hipEventRecord() has not been called on @p event, this function returns immediately. - * - * TODO-hcc - This function needs to support hipEventBlockingSync parameter. - * - * @param[in] event Event on which to wait. - * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError, - * #hipErrorInvalidResourceHandle, #hipErrorLaunchFailure - * - * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, - * hipEventElapsedTime - */ -hipError_t hipEventSynchronize(hipEvent_t event); - - -/** - * @brief Return the elapsed time between two events. - * - * @param[out] ms : Return time between start and stop in ms. - * @param[in] start : Start event. - * @param[in] stop : Stop event. - * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotReady, #hipErrorInvalidResourceHandle, - * #hipErrorInitializationError, #hipErrorLaunchFailure - * - * Computes the elapsed time between two events. Time is computed in ms, with - * a resolution of approximately 1 us. - * - * Events which are recorded in a NULL stream will block until all commands - * on all other streams complete execution, and then record the timestamp. - * - * Events which are recorded in a non-NULL stream will record their timestamp - * when they reach the head of the specified stream, after all previous - * commands in that stream have completed executing. Thus the time that - * the event recorded may be significantly after the host calls hipEventRecord(). - * - * If hipEventRecord() has not been called on either event, then #hipErrorInvalidResourceHandle is - * returned. If hipEventRecord() has been called on both events, but the timestamp has not yet been - * recorded on one or both events (that is, hipEventQuery() would return #hipErrorNotReady on at - * least one of the events), then #hipErrorNotReady is returned. - * - * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, - * hipEventSynchronize - */ -hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop); - - -/** - * @brief Query event status - * - * @param[in] event Event to query. - * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidResourceHandle, #hipErrorInvalidValue, - * #hipErrorInitializationError, #hipErrorLaunchFailure - * - * Query the status of the specified event. This function will return #hipErrorNotReady if all - * commands in the appropriate stream (specified to hipEventRecord()) have completed. If that work - * has not completed, or if hipEventRecord() was not called on the event, then #hipSuccess is - * returned. - * - * @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy, - * hipEventSynchronize, hipEventElapsedTime - */ -hipError_t hipEventQuery(hipEvent_t event); - - -// end doxygen Events -/** - * @} - */ - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Memory Memory Management - * @{ - * - * The following CUDA APIs are not currently supported: - * - cudaMalloc3D - * - cudaMalloc3DArray - * - TODO - more 2D, 3D, array APIs here. - * - * - */ - - -/** - * @brief Return attributes for the specified pointer - * - * @param[out] attributes for the specified pointer - * @param[in] pointer to get attributes for - * - * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue - * - * @see hipGetDeviceCount, hipGetDevice, hipSetDevice, hipChooseDevice - */ -hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr); - -/** - * @brief Allocate memory on the default accelerator - * - * @param[out] ptr Pointer to the allocated memory - * @param[in] size Requested memory size - * - * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. - * - * @return #hipSuccess, #hipErrorMemoryAllocation, #hipErrorInvalidValue (bad context, null *ptr) - * - * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, - * hipHostFree, hipHostMalloc - */ -hipError_t hipMalloc(void** ptr, size_t size); - -/** - * @brief Allocate pinned host memory [Deprecated] - * - * @param[out] ptr Pointer to the allocated host pinned memory - * @param[in] size Requested memory size - * - * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. - * - * @return #hipSuccess, #hipErrorMemoryAllocation - * - * @deprecated use hipHostMalloc() instead - */ -DEPRECATED("use hipHostMalloc instead") -hipError_t hipMallocHost(void** ptr, size_t size); - -/** - * @brief Allocate device accessible page locked host memory - * - * @param[out] ptr Pointer to the allocated host pinned memory - * @param[in] size Requested memory size - * @param[in] flags Type of host memory allocation - * - * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. - * - * @return #hipSuccess, #hipErrorMemoryAllocation - * - * @see hipSetDeviceFlags, hipHostFree - */ -hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags); - -/** - * @brief Allocate device accessible page locked host memory [Deprecated] - * - * @param[out] ptr Pointer to the allocated host pinned memory - * @param[in] size Requested memory size - * @param[in] flags Type of host memory allocation - * - * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. - * - * @return #hipSuccess, #hipErrorMemoryAllocation - * - * @deprecated use hipHostMalloc() instead - */ -DEPRECATED("use hipHostMalloc instead") -hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags); - -/** - * @brief Get Device pointer from Host Pointer allocated through hipHostMalloc - * - * @param[out] dstPtr Device Pointer mapped to passed host pointer - * @param[in] hstPtr Host Pointer allocated through hipHostMalloc - * @param[in] flags Flags to be passed for extension - * - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation - * - * @see hipSetDeviceFlags, hipHostMalloc - */ -hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags); - -/** - * @brief Return flags associated with host pointer - * - * @param[out] flagsPtr Memory location to store flags - * @param[in] hostPtr Host Pointer allocated through hipHostMalloc - * @return #hipSuccess, #hipErrorInvalidValue - * - * @see hipHostMalloc - */ -hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr); - -/** - * @brief Register host memory so it can be accessed from the current device. - * - * @param[out] hostPtr Pointer to host memory to be registered. - * @param[in] sizeBytes size of the host memory - * @param[in] flags. See below. - * - * Flags: - * - #hipHostRegisterDefault Memory is Mapped and Portable - * - #hipHostRegisterPortable Memory is considered registered by all contexts. HIP only supports - * one context so this is always assumed true. - * - #hipHostRegisterMapped Map the allocation into the address space for the current device. - * The device pointer can be obtained with #hipHostGetDevicePointer. - * - * - * After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer. - * On many systems, the mapped device pointer will have a different value than the mapped host - * pointer. Applications must use the device pointer in device code, and the host pointer in device - * code. - * - * On some systems, registered memory is pinned. On some systems, registered memory may not be - * actually be pinned but uses OS or hardware facilities to all GPU access to the host memory. - * - * Developers are strongly encouraged to register memory blocks which are aligned to the host - * cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction). - * - * If registering non-aligned pointers, the application must take care when register pointers from - * the same cache line on different devices. HIP's coarse-grained synchronization model does not - * guarantee correct results if different devices write to different parts of the same cache block - - * typically one of the writes will "win" and overwrite data from the other registered memory - * region. - * - * @return #hipSuccess, #hipErrorMemoryAllocation - * - * @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer - */ -hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags); - -/** - * @brief Un-register host pointer - * - * @param[in] hostPtr Host pointer previously registered with #hipHostRegister - * @return Error code - * - * @see hipHostRegister - */ -hipError_t hipHostUnregister(void* hostPtr); - -/** - * Allocates at least width (in bytes) * height bytes of linear memory - * Padding may occur to ensure alighnment requirements are met for the given row - * The change in width size due to padding will be returned in *pitch. - * Currently the alignment is set to 128 bytes - * - * @param[out] ptr Pointer to the allocated device memory - * @param[out] pitch Pitch for allocation (in bytes) - * @param[in] width Requested pitched allocation width (in bytes) - * @param[in] height Requested pitched allocation height - * - * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. - * - * @return Error code - * - * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, - * hipMalloc3DArray, hipHostMalloc - */ - -hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height); - -/** - * @brief Free memory allocated by the hcc hip memory allocation API. - * This API performs an implicit hipDeviceSynchronize() call. - * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. - * - * @param[in] ptr Pointer to memory to be freed - * @return #hipSuccess - * @return #hipErrorInvalidDevicePointer (if pointer is invalid, including host pointers allocated - * with hipHostMalloc) - * - * @see hipMalloc, hipMallocPitch, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, - * hipMalloc3DArray, hipHostMalloc - */ -hipError_t hipFree(void* ptr); - -/** - * @brief Free memory allocated by the hcc hip host memory allocation API. [Deprecated] - * - * @param[in] ptr Pointer to memory to be freed - * @return #hipSuccess, - * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with - hipMalloc) - - * @deprecated use hipHostFree() instead - */ -DEPRECATED("use hipHostFree instead") -hipError_t hipFreeHost(void* ptr); - -/** - * @brief Free memory allocated by the hcc hip host memory allocation API - * This API performs an implicit hipDeviceSynchronize() call. - * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. - * - * @param[in] ptr Pointer to memory to be freed - * @return #hipSuccess, - * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with - * hipMalloc) - * - * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, - * hipMalloc3DArray, hipHostMalloc - */ -hipError_t hipHostFree(void* ptr); - -/** - * @brief Copy data from src to dst. - * - * It supports memory from host to device, - * device to host, device to device and host to host - * The src and dst must not overlap. - * - * For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice). - * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the - * device where the src data is physically located. For optimal peer-to-peer copies, the copy device - * must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy - * agent as the current device and src/dest as the peerDevice argument. if this is not done, the - * hipMemcpy will still work, but will perform the copy using a staging buffer on the host. - * - * @param[out] dst Data being copy to - * @param[in] src Data being copy from - * @param[in] sizeBytes Data size in bytes - * @param[in] copyType Memory copy type - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknowni - * - * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, - * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, - * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, - * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, - * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, - * hipMemHostAlloc, hipMemHostGetDevicePointer - */ -hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); - -/** - * @brief Copy data from Host to Device - * - * @param[out] dst Data being copy to - * @param[in] src Data being copy from - * @param[in] sizeBytes Data size in bytes - * - * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, - * #hipErrorInvalidValue - * - * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, - * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, - * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, - * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, - * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, - * hipMemHostAlloc, hipMemHostGetDevicePointer - */ -hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes); - -/** - * @brief Copy data from Device to Host - * - * @param[out] dst Data being copy to - * @param[in] src Data being copy from - * @param[in] sizeBytes Data size in bytes - * - * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, - * #hipErrorInvalidValue - * - * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, - * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, - * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, - * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, - * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, - * hipMemHostAlloc, hipMemHostGetDevicePointer - */ -hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes); - -/** - * @brief Copy data from Device to Device - * - * @param[out] dst Data being copy to - * @param[in] src Data being copy from - * @param[in] sizeBytes Data size in bytes - * - * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, - * #hipErrorInvalidValue - * - * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, - * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, - * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, - * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, - * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, - * hipMemHostAlloc, hipMemHostGetDevicePointer - */ -hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes); - -/** - * @brief Copy data from Host to Device asynchronously - * - * @param[out] dst Data being copy to - * @param[in] src Data being copy from - * @param[in] sizeBytes Data size in bytes - * - * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, - * #hipErrorInvalidValue - * - * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, - * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, - * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, - * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, - * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, - * hipMemHostAlloc, hipMemHostGetDevicePointer - */ -hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream); - -/** - * @brief Copy data from Device to Host asynchronously - * - * @param[out] dst Data being copy to - * @param[in] src Data being copy from - * @param[in] sizeBytes Data size in bytes - * - * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, - * #hipErrorInvalidValue - * - * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, - * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, - * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, - * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, - * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, - * hipMemHostAlloc, hipMemHostGetDevicePointer - */ -hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream); - -/** - * @brief Copy data from Device to Device asynchronously - * - * @param[out] dst Data being copy to - * @param[in] src Data being copy from - * @param[in] sizeBytes Data size in bytes - * - * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, - * #hipErrorInvalidValue - * - * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, - * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, - * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, - * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, - * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, - * hipMemHostAlloc, hipMemHostGetDevicePointer - */ -hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, - hipStream_t stream); - - -/** - * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area - * pointed to by @p offset bytes from the start of symbol @p symbol. - * - * The memory areas may not overlap. Symbol can either be a variable that resides in global or - * constant memory space, or it can be a character string, naming a variable that resides in global - * or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice - * TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use - * hipErrorUnknown for now. - * - * @param[in] symbolName - Symbol destination on device - * @param[in] src - Data being copy from - * @param[in] sizeBytes - Data size in bytes - * @param[in] offset - Offset from start of symbol in bytes - * @param[in] kind - Type of transfer - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown - * - * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, - * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol, - * hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, - * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, - * hipMemcpyFromSymbolAsync - */ -hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t sizeBytes, - size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); - - -/** - * @brief Copies the memory address of symbol @p symbolName to @p devPtr - * - * @param[in] symbolName - Symbol on device - * @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol - * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound - * - * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, - * hipMemcpyFromSymbolAsync - */ -hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName); - - -/** - * @brief Copies the size of symbol @p symbolName to @p size - * - * @param[in] symbolName - Symbol on device - * @param[out] size - Pointer to the size of the symbol - * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound - * - * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, - * hipMemcpyFromSymbolAsync - */ -hipError_t hipGetSymbolSize(size_t* size, const void* symbolName); - - -/** - * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area - * pointed to by @p offset bytes from the start of symbol @p symbol - * - * The memory areas may not overlap. Symbol can either be a variable that resides in global or - * constant memory space, or it can be a character string, naming a variable that resides in global - * or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice - * hipMemcpyToSymbolAsync() is asynchronous with respect to the host, so the call may return before - * copy is complete. - * TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use - * hipErrorUnknown for now. - * - * @param[in] symbolName - Symbol destination on device - * @param[in] src - Data being copy from - * @param[in] sizeBytes - Data size in bytes - * @param[in] offset - Offset from start of symbol in bytes - * @param[in] kind - Type of transfer - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown - * - * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, - * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol, - * hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, - * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, - * hipMemcpyFromSymbolAsync - */ -hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t sizeBytes, - size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); - -hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes, - size_t offset __dparm(0), hipMemcpyKind kind __dparm( hipMemcpyDeviceToHost )); - -hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t sizeBytes, - size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); - -/** - * @brief Copy data from src to dst asynchronously. - * - * @warning If host or dest are not pinned, the memory copy will be performed synchronously. For - * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously. - * - * @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies. - * For hipMemcpy, the copy is always performed by the device associated with the specified stream. - * - * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is a - * attached to the device where the src data is physically located. For optimal peer-to-peer copies, - * the copy device must be able to access the src and dst pointers (by calling - * hipDeviceEnablePeerAccess with copy agent as the current device and src/dest as the peerDevice - * argument. if this is not done, the hipMemcpy will still work, but will perform the copy using a - * staging buffer on the host. - * - * @param[out] dst Data being copy to - * @param[in] src Data being copy from - * @param[in] sizeBytes Data size in bytes - * @param[in] accelerator_view Accelerator view which the copy is being enqueued - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown - * - * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, - * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol, - * hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, - * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, - * hipMemcpyFromSymbolAsync - */ -hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, - hipStream_t stream __dparm(0)); - -/** - * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant - * byte value value. - * - * @param[out] dst Data being filled - * @param[in] constant value to be set - * @param[in] sizeBytes Data size in bytes - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized - */ -hipError_t hipMemset(void* dst, int value, size_t sizeBytes); - -/** - * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant - * byte value value. - * - * @param[out] dst Data ptr to be filled - * @param[in] constant value to be set - * @param[in] sizeBytes Data size in bytes - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized - */ -hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes); - -/** - * @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant - * byte value value. - * - * hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the - * memset is complete. The operation can optionally be associated to a stream by passing a non-zero - * stream argument. If stream is non-zero, the operation may overlap with operations in other - * streams. - * - * @param[out] dst Pointer to device memory - * @param[in] value - Value to set for each byte of specified memory - * @param[in] sizeBytes - Size in bytes to set - * @param[in] stream - Stream identifier - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree - */ -hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0)); - -/** - * @brief Fills the memory area pointed to by dst with the constant value. - * - * @param[out] dst Pointer to device memory - * @param[in] pitch - data size in bytes - * @param[in] value - constant value to be set - * @param[in] width - * @param[in] height - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree - */ - -hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height); - -/** - * @brief Fills asynchronously the memory area pointed to by dst with the constant value. - * - * @param[in] dst Pointer to device memory - * @param[in] pitch - data size in bytes - * @param[in] value - constant value to be set - * @param[in] width - * @param[in] height - * @param[in] stream - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree - */ - -hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height,hipStream_t stream __dparm(0)); - -/** - * @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value. - * - * @param[in] pitchedDevPtr - * @param[in] value - constant value to be set - * @param[in] extent - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree - */ -hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ); - -/** - * @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value. - * - * @param[in] pitchedDevPtr - * @param[in] value - constant value to be set - * @param[in] extent - * @param[in] stream - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree - */ -hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream __dparm(0)); - -/** - * @brief Query memory info. - * Return snapshot of free memory, and total allocatable memory on the device. - * - * Returns in *free a snapshot of the current free memory. - * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue - * @warning On HCC, the free memory only accounts for memory allocated by this process and may be - *optimistic. - **/ -hipError_t hipMemGetInfo(size_t* free, size_t* total); - - -hipError_t hipMemPtrGetInfo(void* ptr, size_t* size); - - -/** - * @brief Allocate an array on the device. - * - * @param[out] array Pointer to allocated array in device memory - * @param[in] desc Requested channel format - * @param[in] width Requested array allocation width - * @param[in] height Requested array allocation height - * @param[in] flags Requested properties of allocated array - * @return #hipSuccess, #hipErrorMemoryAllocation - * - * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree - */ -hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width, - size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault)); -hipError_t hipArrayCreate(hipArray** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray); - -hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray); - -hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent); - -/** - * @brief Frees an array on the device. - * - * @param[in] array Pointer to array to free - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError - * - * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipHostMalloc, hipHostFree - */ -hipError_t hipFreeArray(hipArray* array); - -/** - * @brief Allocate an array on the device. - * - * @param[out] array Pointer to allocated array in device memory - * @param[in] desc Requested channel format - * @param[in] extent Requested array allocation width, height and depth - * @param[in] flags Requested properties of allocated array - * @return #hipSuccess, #hipErrorMemoryAllocation - * - * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree - */ - -hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc, - struct hipExtent extent, unsigned int flags); -/** - * @brief Copies data between host and device. - * - * @param[in] dst Destination memory address - * @param[in] dpitch Pitch of destination memory - * @param[in] src Source memory address - * @param[in] spitch Pitch of source memory - * @param[in] width Width of matrix transfer (columns in bytes) - * @param[in] height Height of matrix transfer (rows) - * @param[in] kind Type of transfer - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, - * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection - * - * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, - * hipMemcpyAsync - */ -hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind); -hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy); - -/** - * @brief Copies data between host and device. - * - * @param[in] dst Destination memory address - * @param[in] dpitch Pitch of destination memory - * @param[in] src Source memory address - * @param[in] spitch Pitch of source memory - * @param[in] width Width of matrix transfer (columns in bytes) - * @param[in] height Height of matrix transfer (rows) - * @param[in] kind Type of transfer - * @param[in] stream Stream to use - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, - * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection - * - * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, - * hipMemcpyAsync - */ -hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0)); - -/** - * @brief Copies data between host and device. - * - * @param[in] dst Destination memory address - * @param[in] dpitch Pitch of destination memory - * @param[in] src Source memory address - * @param[in] spitch Pitch of source memory - * @param[in] width Width of matrix transfer (columns in bytes) - * @param[in] height Height of matrix transfer (rows) - * @param[in] kind Type of transfer - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, - * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection - * - * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, - * hipMemcpyAsync - */ -hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, - size_t spitch, size_t width, size_t height, hipMemcpyKind kind); - -/** - * @brief Copies data between host and device. - * - * @param[in] dst Destination memory address - * @param[in] dpitch Pitch of destination memory - * @param[in] src Source memory address - * @param[in] spitch Pitch of source memory - * @param[in] width Width of matrix transfer (columns in bytes) - * @param[in] height Height of matrix transfer (rows) - * @param[in] kind Type of transfer - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, - * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection - * - * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, - * hipMemcpyAsync - */ -hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, - size_t count, hipMemcpyKind kind); - -/** - * @brief Copies data between host and device. - * - * @param[in] dst Destination memory address - * @param[in] srcArray Source memory address - * @param[in] woffset Source starting X offset - * @param[in] hOffset Source starting Y offset - * @param[in] count Size in bytes to copy - * @param[in] kind Type of transfer - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, - * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection - * - * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, - * hipMemcpyAsync - */ -hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, - size_t count, hipMemcpyKind kind); - -/** - * @brief Copies data between host and device. - * - * @param[in] dst Destination memory address - * @param[in] srcArray Source array - * @param[in] srcoffset Offset in bytes of source array - * @param[in] count Size of memory copy in bytes - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, - * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection - * - * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, - * hipMemcpyAsync - */ -hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count); - -/** - * @brief Copies data between host and device. - * - * @param[in] dstArray Destination memory address - * @param[in] dstOffset Offset in bytes of destination array - * @param[in] srcHost Source host pointer - * @param[in] count Size of memory copy in bytes - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, - * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection - * - * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, - * hipMemcpyAsync - */ -hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count); - -/** - * @brief Copies data between host and device. - * - * @param[in] p 3D memory copy parameters - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, - * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection - * - * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, - * hipMemcpyAsync - */ -hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); - -// doxygen end Memory -/** - * @} - */ - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup PeerToPeer Device Memory Access - * @{ - * - * @warning PeerToPeer support is experimental. - * - */ - -/** - * @brief Determine if a device can access a peer's memory. - * - * @param [out] canAccessPeer Returns the peer access capability (0 or 1) - * @param [in] device - device from where memory may be accessed. - * @param [in] peerDevice - device where memory is physically located - * - * Returns "1" in @p canAccessPeer if the specified @p device is capable - * of directly accessing memory physically located on peerDevice , or "0" if not. - * - * Returns "0" in @p canAccessPeer if deviceId == peerDeviceId, and both are valid devices : a - * device is not a peer of itself. - * - * @returns #hipSuccess, - * @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices - */ -hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId); - - -/** - * @brief Enable direct access from current device's virtual address space to memory allocations - * physically located on a peer device. - * - * Memory which already allocated on peer device will be mapped into the address space of the - * current device. In addition, all future memory allocations on peerDeviceId will be mapped into - * the address space of the current device when the memory is allocated. The peer memory remains - * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. - * - * - * @param [in] peerDeviceId - * @param [in] flags - * - * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, - * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device. - */ -hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags); - - -/** - * @brief Disable direct access from current device's virtual address space to memory allocations - * physically located on a peer device. - * - * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been - * enabled from the current device. - * - * @param [in] peerDeviceId - * - * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled - */ -hipError_t hipDeviceDisablePeerAccess(int peerDeviceId); - -/** - * @brief Get information on memory allocations. - * - * @param [out] pbase - BAse pointer address - * @param [out] psize - Size of allocation - * @param [in] dptr- Device Pointer - * - * @returns #hipSuccess, #hipErrorInvalidDevicePointer - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr); - -#ifndef USE_PEER_NON_UNIFIED -#define USE_PEER_NON_UNIFIED 1 -#endif - -#if USE_PEER_NON_UNIFIED == 1 -/** - * @brief Copies memory from one device to memory on another device. - * - * @param [out] dst - Destination device pointer. - * @param [in] dstDeviceId - Destination device - * @param [in] src - Source device pointer - * @param [in] srcDeviceId - Source device - * @param [in] sizeBytes - Size of memory copy in bytes - * - * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice - */ -hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, - size_t sizeBytes); - -/** - * @brief Copies memory from one device to memory on another device. - * - * @param [out] dst - Destination device pointer. - * @param [in] dstDevice - Destination device - * @param [in] src - Source device pointer - * @param [in] srcDevice - Source device - * @param [in] sizeBytes - Size of memory copy in bytes - * @param [in] stream - Stream identifier - * - * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice - */ -hipError_t hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int srcDevice, - size_t sizeBytes, hipStream_t stream __dparm(0)); -#endif - - -// doxygen end PeerToPeer -/** - * @} - */ - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Driver Initialization and Version - * @{ - * - */ - -/** - * @brief Explicitly initializes the HIP runtime. - * - * Most HIP APIs implicitly initialize the HIP runtime. - * This API provides control over the timing of the initialization. - */ -// TODO-ctx - more description on error codes. -hipError_t hipInit(unsigned int flags); - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Context Management - * @{ - */ - -/** - * @brief Create a context and set it as current/ default context - * - * @param [out] ctx - * @param [in] flags - * @param [in] associated device handle - * - * @return #hipSuccess - * - * @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent, - * hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device); - -/** - * @brief Destroy a HIP context. - * - * @param [in] ctx Context to destroy - * - * @returns #hipSuccess, #hipErrorInvalidValue - * - * @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent, - * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxDestroy(hipCtx_t ctx); - -/** - * @brief Pop the current/default context and return the popped context. - * - * @param [out] ctx - * - * @returns #hipSuccess, #hipErrorInvalidContext - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent, - * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxPopCurrent(hipCtx_t* ctx); - -/** - * @brief Push the context to be set as current/ default context - * - * @param [in] ctx - * - * @returns #hipSuccess, #hipErrorInvalidContext - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxPushCurrent(hipCtx_t ctx); - -/** - * @brief Set the passed context as current/default - * - * @param [in] ctx - * - * @returns #hipSuccess, #hipErrorInvalidContext - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxSetCurrent(hipCtx_t ctx); - -/** - * @brief Get the handle of the current/ default context - * - * @param [out] ctx - * - * @returns #hipSuccess, #hipErrorInvalidContext - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, - * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxGetCurrent(hipCtx_t* ctx); - -/** - * @brief Get the handle of the device associated with current/default context - * - * @param [out] device - * - * @returns #hipSuccess, #hipErrorInvalidContext - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize - */ - -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxGetDevice(hipDevice_t* device); - -/** - * @brief Returns the approximate HIP api version. - * - * @param [in] ctx Context to check - * @param [out] apiVersion - * - * @return #hipSuccess - * - * @warning The HIP feature set does not correspond to an exact CUDA SDK api revision. - * This function always set *apiVersion to 4 as an approximation though HIP supports - * some features which were introduced in later CUDA SDK revisions. - * HIP apps code should not rely on the api revision number here and should - * use arch feature flags to test device capabilities or conditional compilation. - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, - * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion); - -/** - * @brief Set Cache configuration for a specific function - * - * @param [out] cacheConfiguration - * - * @return #hipSuccess - * - * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is - * ignored on those architectures. - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig); - -/** - * @brief Set L1/Shared cache partition. - * - * @param [in] cacheConfiguration - * - * @return #hipSuccess - * - * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is - * ignored on those architectures. - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig); - -/** - * @brief Set Shared memory bank configuration. - * - * @param [in] sharedMemoryConfiguration - * - * @return #hipSuccess - * - * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is - * ignored on those architectures. - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config); - -/** - * @brief Get Shared memory bank configuration. - * - * @param [out] sharedMemoryConfiguration - * - * @return #hipSuccess - * - * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is - * ignored on those architectures. - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig); - -/** - * @brief Blocks until the default context has completed all preceding requested tasks. - * - * @return #hipSuccess - * - * @warning This function waits for all streams on the default context to complete execution, and - * then returns. - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxSynchronize(void); - -/** - * @brief Return flags used for creating default context. - * - * @param [out] flags - * - * @returns #hipSuccess - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxGetFlags(unsigned int* flags); - -/** - * @brief Enables direct access to memory allocations in a peer context. - * - * Memory which already allocated on peer device will be mapped into the address space of the - * current device. In addition, all future memory allocations on peerDeviceId will be mapped into - * the address space of the current device when the memory is allocated. The peer memory remains - * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. - * - * - * @param [in] peerCtx - * @param [in] flags - * - * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, - * #hipErrorPeerAccessAlreadyEnabled - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - * @warning PeerToPeer support is experimental. - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags); - -/** - * @brief Disable direct access from current context's virtual address space to memory allocations - * physically located on a peer context.Disables direct access to memory allocations in a peer - * context and unregisters any registered allocations. - * - * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been - * enabled from the current device. - * - * @param [in] peerCtx - * - * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - * @warning PeerToPeer support is experimental. - */ -DEPRECATED(DEPRECATED_MSG) -hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx); - -/** - * @brief Get the state of the primary context. - * - * @param [in] Device to get primary context flags for - * @param [out] Pointer to store flags - * @param [out] Pointer to store context state; 0 = inactive, 1 = active - * - * @returns #hipSuccess - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active); - -/** - * @brief Release the primary context on the GPU. - * - * @param [in] Device which primary context is released - * - * @returns #hipSuccess - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - * @warning This function return #hipSuccess though doesn't release the primaryCtx by design on - * HIP/HCC path. - */ -hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev); - -/** - * @brief Retain the primary context on the GPU. - * - * @param [out] Returned context handle of the new context - * @param [in] Device which primary context is released - * - * @returns #hipSuccess - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev); - -/** - * @brief Resets the primary context on the GPU. - * - * @param [in] Device which primary context is reset - * - * @returns #hipSuccess - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev); - -/** - * @brief Set flags for the primary context. - * - * @param [in] Device for which the primary context flags are set - * @param [in] New flags for the device - * - * @returns #hipSuccess, #hipErrorContextAlreadyInUse - * - * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, - * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice - */ -hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags); - -// doxygen end Context Management -/** - * @} - */ - -/** - * @brief Returns a handle to a compute device - * @param [out] device - * @param [in] ordinal - * - * @returns #hipSuccess, #hipErrorInavlidDevice - */ -hipError_t hipDeviceGet(hipDevice_t* device, int ordinal); - -/** - * @brief Returns the compute capability of the device - * @param [out] major - * @param [out] minor - * @param [in] device - * - * @returns #hipSuccess, #hipErrorInavlidDevice - */ -hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device); - -/** - * @brief Returns an identifer string for the device. - * @param [out] name - * @param [in] len - * @param [in] device - * - * @returns #hipSuccess, #hipErrorInavlidDevice - */ -hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device); - -/** - * @brief Returns a PCI Bus Id string for the device, overloaded to take int device ID. - * @param [out] pciBusId - * @param [in] len - * @param [in] device - * - * @returns #hipSuccess, #hipErrorInavlidDevice - */ -hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device); - - -/** - * @brief Returns a handle to a compute device. - * @param [out] device handle - * @param [in] PCI Bus ID - * - * @returns #hipSuccess, #hipErrorInavlidDevice, #hipErrorInvalidValue - */ -hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId); - - -/** - * @brief Returns the total amount of memory on the device. - * @param [out] bytes - * @param [in] device - * - * @returns #hipSuccess, #hipErrorInavlidDevice - */ -hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device); - -/** - * @brief Returns the approximate HIP driver version. - * - * @param [out] driverVersion - * - * @returns #hipSuccess, #hipErrorInavlidValue - * - * @warning The HIP feature set does not correspond to an exact CUDA SDK driver revision. - * This function always set *driverVersion to 4 as an approximation though HIP supports - * some features which were introduced in later CUDA SDK revisions. - * HIP apps code should not rely on the driver revision number here and should - * use arch feature flags to test device capabilities or conditional compilation. - * - * @see hipRuntimeGetVersion - */ -hipError_t hipDriverGetVersion(int* driverVersion); - -/** - * @brief Returns the approximate HIP Runtime version. - * - * @param [out] runtimeVersion - * - * @returns #hipSuccess, #hipErrorInavlidValue - * - * @warning On HIP/HCC path this function returns HIP runtime patch version however on - * HIP/NVCC path this function return CUDA runtime version. - * - * @see hipDriverGetVersion - */ -hipError_t hipRuntimeGetVersion(int* runtimeVersion); - -/** - * @brief Loads code object from file into a hipModule_t - * - * @param [in] fname - * @param [out] module - * - * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorFileNotFound, - * hipErrorOutOfMemory, hipErrorSharedObjectInitFailed, hipErrorNotInitialized - * - * - */ -hipError_t hipModuleLoad(hipModule_t* module, const char* fname); - -/** - * @brief Frees the module - * - * @param [in] module - * - * @returns hipSuccess, hipInvalidValue - * module is freed and the code objects associated with it are destroyed - * - */ - -hipError_t hipModuleUnload(hipModule_t module); - -/** - * @brief Function with kname will be extracted if present in module - * - * @param [in] module - * @param [in] kname - * @param [out] function - * - * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized, - * hipErrorNotFound, - */ -hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname); - -/** - * @bried Find out attributes for a given function. - * - * @param [out] attr - * @param [in] func - * - * @returns hipSuccess, hipErrorInvalidDeviceFunction - */ - -hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func); - -/** - * @brief returns device memory pointer and size of the kernel present in the module with symbol @p - * name - * - * @param [out] dptr - * @param [out] bytes - * @param [in] hmod - * @param [in] name - * - * @returns hipSuccess, hipErrorInvalidValue, hipErrorNotInitialized - */ -hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, - const char* name); - -hipError_t ihipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, - const char* name); - -hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name); -/** - * @brief builds module from code object which resides in host memory. Image is pointer to that - * location. - * - * @param [in] image - * @param [out] module - * - * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized - */ -hipError_t hipModuleLoadData(hipModule_t* module, const void* image); - -/** - * @brief builds module from code object which resides in host memory. Image is pointer to that - * location. Options are not used. hipModuleLoadData is called. - * - * @param [in] image - * @param [out] module - * @param [in] number of options - * @param [in] options for JIT - * @param [in] option values for JIT - * - * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized - */ -hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, - hipJitOption* options, void** optionValues); - -/** - * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed - * to kernelparams or extra - * - * @param [in] f Kernel to launch. - * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. - * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. - * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. - * @param [in] blockDimX X block dimensions specified in work-items - * @param [in] blockDimY Y grid dimension specified in work-items - * @param [in] blockDimZ Z grid dimension specified in work-items - * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The - * kernel can access this with HIP_DYNAMIC_SHARED. - * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th - * default stream is used with associated synchronization rules. - * @param [in] kernelParams - * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and - * must be in the memory layout and alignment expected by the kernel. - * - * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue - * - * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please - * refer to hip_porting_driver_api.md for sample usage. - */ -hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, - unsigned int gridDimZ, unsigned int blockDimX, - unsigned int blockDimY, unsigned int blockDimZ, - unsigned int sharedMemBytes, hipStream_t stream, - void** kernelParams, void** extra); - -// doxygen end Version Management -/** - * @} - */ - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Profiler Control - * @{ - * - * - * @warning The cudaProfilerInitialize API format for "configFile" is not supported. - * - */ - - -// TODO - expand descriptions: -/** - * @brief Start recording of profiling information - * When using this API, start the profiler with profiling disabled. (--startdisabled) - * @warning : hipProfilerStart API is under development. - */ -hipError_t hipProfilerStart(); - - -/** - * @brief Stop recording of profiling information. - * When using this API, start the profiler with profiling disabled. (--startdisabled) - * @warning : hipProfilerStop API is under development. - */ -hipError_t hipProfilerStop(); - - -/** - * @} - */ - -// TODO: implement IPC apis - -/** - * @brief Gets an interprocess memory handle for an existing device memory - * allocation - * - * Takes a pointer to the base of an existing device memory allocation created - * with hipMalloc and exports it for use in another process. This is a - * lightweight operation and may be called multiple times on an allocation - * without adverse effects. - * - * If a region of memory is freed with hipFree and a subsequent call - * to hipMalloc returns memory with the same device address, - * hipIpcGetMemHandle will return a unique handle for the - * new memory. - * - * @param handle - Pointer to user allocated hipIpcMemHandle to return - * the handle in. - * @param devPtr - Base pointer to previously allocated device memory - * - * @returns - * hipSuccess, - * hipErrorInvalidResourceHandle, - * hipErrorMemoryAllocation, - * hipErrorMapBufferObjectFailed, - * - */ -hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); - -/** - * @brief Opens an interprocess memory handle exported from another process - * and returns a device pointer usable in the local process. - * - * Maps memory exported from another process with hipIpcGetMemHandle into - * the current device address space. For contexts on different devices - * hipIpcOpenMemHandle can attempt to enable peer access between the - * devices as if the user called hipDeviceEnablePeerAccess. This behavior is - * controlled by the hipIpcMemLazyEnablePeerAccess flag. - * hipDeviceCanAccessPeer can determine if a mapping is possible. - * - * Contexts that may open hipIpcMemHandles are restricted in the following way. - * hipIpcMemHandles from each device in a given process may only be opened - * by one context per device per other process. - * - * Memory returned from hipIpcOpenMemHandle must be freed with - * hipIpcCloseMemHandle. - * - * Calling hipFree on an exported memory region before calling - * hipIpcCloseMemHandle in the importing context will result in undefined - * behavior. - * - * @param devPtr - Returned device pointer - * @param handle - hipIpcMemHandle to open - * @param flags - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess - * - * @returns - * hipSuccess, - * hipErrorMapBufferObjectFailed, - * hipErrorInvalidResourceHandle, - * hipErrorTooManyPeers - * - * @note No guarantees are made about the address returned in @p *devPtr. - * In particular, multiple processes may not receive the same address for the same @p handle. - * - */ -hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); - -/** - * @brief Close memory mapped with hipIpcOpenMemHandle - * - * Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation - * in the exporting process as well as imported mappings in other processes - * will be unaffected. - * - * Any resources used to enable peer access will be freed if this is the - * last mapping using them. - * - * @param devPtr - Device pointer returned by hipIpcOpenMemHandle - * - * @returns - * hipSuccess, - * hipErrorMapBufferObjectFailed, - * hipErrorInvalidResourceHandle, - * - */ -hipError_t hipIpcCloseMemHandle(void* devPtr); - - -// hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); -// hipError_t hipIpcCloseMemHandle(void *devPtr); -// // hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); -// hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Clang Launch API to support the triple-chevron syntax - * @{ - */ - -/** - * @brief Configure a kernel launch. - * - * @param [in] gridDim grid dimension specified as multiple of blockDim. - * @param [in] blockDim block dimensions specified in work-items - * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The - * kernel can access this with HIP_DYNAMIC_SHARED. - * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the - * default stream is used with associated synchronization rules. - * - * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue - * - */ -hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), hipStream_t stream __dparm(0)); - - -/** - * @brief Set a kernel argument. - * - * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue - * - * @param [in] arg Pointer the argument in host memory. - * @param [in] size Size of the argument. - * @param [in] offset Offset of the argument on the argument stack. - * - */ -hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); - - -/** - * @brief Launch a kernel. - * - * @param [in] func Kernel to launch. - * - * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue - * - */ -hipError_t hipLaunchByPtr(const void* func); - - - -/** - * @} - */ - - -#ifdef __cplusplus -} /* extern "c" */ -#endif - -#include - -#ifdef __cplusplus -extern "C" { -#endif -/** - * Callback/Activity API - */ -hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg); -hipError_t hipRemoveApiCallback(uint32_t id); -hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg); -hipError_t hipRemoveActivityCallback(uint32_t id); -static inline const char* hipApiName(const uint32_t& id) { return hip_api_name(id); } -const char* hipKernelNameRef(hipFunction_t f); -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#ifdef __cplusplus - -hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, - const hipChannelFormatDesc* desc, size_t size = UINT_MAX); - -hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, - const void* devPtr, const struct hipChannelFormatDesc* desc, - size_t size, textureReference* tex); - -/* - * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture - *reference tex. - * - * @p desc describes how the memory is interpreted when fetching values from the texture. The @p - *offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any - *memory previously bound to tex is unbound. - * - * @param[in] offset - Offset in bytes - * @param[out] tex - texture to bind - * @param[in] devPtr - Memory area on device - * @param[in] desc - Channel format - * @param[in] size - Size of the memory area pointed to by devPtr - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown - **/ -template -hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, - const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { - return ihipBindTextureImpl(dim, readMode, offset, devPtr, &desc, size, &tex); -} - -/* - * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture - *reference tex. - * - * @p desc describes how the memory is interpreted when fetching values from the texture. The @p - *offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any - *memory previously bound to tex is unbound. - * - * @param[in] offset - Offset in bytes - * @param[in] tex - texture to bind - * @param[in] devPtr - Memory area on device - * @param[in] size - Size of the memory area pointed to by devPtr - * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown - **/ -template -hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, - size_t size = UINT_MAX) { - return ihipBindTextureImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex); -} - -// C API -hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, - const hipChannelFormatDesc* desc, size_t width, size_t height, - size_t pitch); - -hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, - const void* devPtr, const struct hipChannelFormatDesc* desc, - size_t width, size_t height, textureReference* tex); - -template -hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, - const void* devPtr, size_t width, size_t height, size_t pitch) { - return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), width, height, - &tex); -} - -template -hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, - const void* devPtr, const struct hipChannelFormatDesc& desc, - size_t width, size_t height, size_t pitch) { - return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &desc, width, height, &tex); -} - -// C API -hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, - const hipChannelFormatDesc* desc); - -hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode, - hipArray_const_t array, - const struct hipChannelFormatDesc& desc, - textureReference* tex); - -template -hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array) { - return ihipBindTextureToArrayImpl(dim, readMode, array, tex.channelDesc, &tex); -} - -template -hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array, - const struct hipChannelFormatDesc& desc) { - return ihipBindTextureToArrayImpl(dim, readMode, array, desc, &tex); -} - -template -inline static hipError_t hipBindTextureToArray(struct texture *tex, - hipArray_const_t array, - const struct hipChannelFormatDesc* desc) { - return ihipBindTextureToArrayImpl(dim, readMode, array, *desc, tex); -} - -// C API -hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, - hipMipmappedArray_const_t mipmappedArray, - const hipChannelFormatDesc* desc); - -template -hipError_t hipBindTextureToMipmappedArray(const texture& tex, - hipMipmappedArray_const_t mipmappedArray) { - return hipSuccess; -} - -template -hipError_t hipBindTextureToMipmappedArray(const texture& tex, - hipMipmappedArray_const_t mipmappedArray, - const hipChannelFormatDesc& desc) { - return hipSuccess; -} - -/* - * @brief Unbinds the textuer bound to @p tex - * - * @param[in] tex - texture to unbind - * - * @return #hipSuccess - **/ -hipError_t hipUnbindTexture(const textureReference* tex); - -extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject); - -template -hipError_t hipUnbindTexture(struct texture& tex) { - return ihipUnbindTextureImpl(tex.textureObject); -} - -hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array); -hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref); -hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol); - -hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, - const hipTextureDesc* pTexDesc, - const hipResourceViewDesc* pResViewDesc); - -hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); - -hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, - hipTextureObject_t textureObject); -hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, - hipTextureObject_t textureObject); -hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, - hipTextureObject_t textureObject); -hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags); - -hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am); - -hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm); - -hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags); - -hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents); - -hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr, - size_t size); - -hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, - hipDeviceptr_t devPtr, size_t pitch); - -hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc); - -hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); - -// doxygen end Texture -/** - * @} - */ - - -#endif - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup HCC_Specific HCC-Specific Accessors - * @{ - * - * The following calls are only supported when compiler HIP with HCC. - * To produce portable code, use of these calls must be guarded #ifdef checks: - * @code - * #ifdef __HCC__ - * hc::accelerator acc; - hipError_t err = hipHccGetAccelerator(deviceId, &acc) - * #endif - * @endcode - * - */ - -// end-group HCC_Specific -/** - * @} - */ - - -// doxygen end HIP API -/** - * @} - */ - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_surface_types.h b/src/utils/amd_hip/hip/hcc_detail/hip_surface_types.h deleted file mode 100644 index f74c01d70..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_surface_types.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -Copyright (c) 2015- present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * @file hcc_detail/hip_surface_types.h - * @brief Defines surface types for HIP runtime. - */ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H - -#include - -/** - * An opaque value that represents a hip surface object - */ -typedef unsigned long long hipSurfaceObject_t; - -/** - * hip surface reference - */ -struct surfaceReference { - hipSurfaceObject_t surfaceObject; -}; - -/** - * hip surface boundary modes - */ -enum hipSurfaceBoundaryMode { - hipBoundaryModeZero = 0, - hipBoundaryModeTrap = 1, - hipBoundaryModeClamp = 2 -}; - -#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H */ diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_texture_types.h b/src/utils/amd_hip/hip/hcc_detail/hip_texture_types.h deleted file mode 100644 index 0a68b507e..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_texture_types.h +++ /dev/null @@ -1,77 +0,0 @@ -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * @file hcc_detail/hip_texture_types.h - * @brief Defines the different newt vector types for HIP runtime. - */ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H - -/******************************************************************************* - * * - * * - * * - *******************************************************************************/ -#include -//#include -#include -#include - -#if __cplusplus - -/******************************************************************************* - * * - * * - * * - *******************************************************************************/ - -template -struct texture : public textureReference { - texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint, - enum hipTextureAddressMode aMode = hipAddressModeClamp) { - normalized = norm; - filterMode = fMode; - addressMode[0] = aMode; - addressMode[1] = aMode; - addressMode[2] = aMode; - channelDesc = hipCreateChannelDesc(); - sRGB = 0; - } - - texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode, - struct hipChannelFormatDesc desc) { - normalized = norm; - filterMode = fMode; - addressMode[0] = aMode; - addressMode[1] = aMode; - addressMode[2] = aMode; - channelDesc = desc; - sRGB = 0; - } -}; - -#endif /* __cplusplus */ - -#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H */ diff --git a/src/utils/amd_hip/hip/hcc_detail/hip_vector_types.h b/src/utils/amd_hip/hip/hcc_detail/hip_vector_types.h deleted file mode 100644 index 1df6385fa..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/hip_vector_types.h +++ /dev/null @@ -1,880 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * @file hcc_detail/hip_vector_types.h - * @brief Defines the different newt vector types for HIP runtime. - */ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H - -#if defined(__HCC__) && (__hcc_workweek__ < 16032) -#error("This version of HIP requires a newer version of HCC."); -#endif - -#include "hip/hcc_detail/host_defines.h" - -#if !defined(_MSC_VER) || __clang__ -#if defined(__clang__) - #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n))) -#elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax. - #define __ROUND_UP_TO_NEXT_POT__(x) \ - (1 << (31 - __builtin_clz(x) + (x > (1 << (31 - __builtin_clz(x)))))) - #define __NATIVE_VECTOR__(n, T) \ - __attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T)))) -#endif - -#if defined(__cplusplus) - #include - - template struct HIP_vector_base; - - template - struct HIP_vector_base { - typedef T Native_vec_ __NATIVE_VECTOR__(1, T); - - union { - Native_vec_ data; - struct { - T x; - }; - }; - }; - - template - struct HIP_vector_base { - typedef T Native_vec_ __NATIVE_VECTOR__(2, T); - - union { - Native_vec_ data; - struct { - T x; - T y; - }; - }; - }; - - template - struct HIP_vector_base { - typedef T Native_vec_ __NATIVE_VECTOR__(3, T); - - union { - Native_vec_ data; - struct { - T x; - T y; - T z; - }; - }; - }; - - template - struct HIP_vector_base { - typedef T Native_vec_ __NATIVE_VECTOR__(4, T); - - union { - Native_vec_ data; - struct { - T x; - T y; - T z; - T w; - }; - }; - }; - - template - struct HIP_vector_type : public HIP_vector_base { - using HIP_vector_base::data; - using typename HIP_vector_base::Native_vec_; - - __host__ __device__ - HIP_vector_type() = default; - template< - typename U, - typename std::enable_if< - std::is_convertible{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type(U x) noexcept - { - for (auto i = 0u; i != rank; ++i) data[i] = x; - } - template< // TODO: constrain based on type as well. - typename... Us, - typename std::enable_if< - (rank > 1) && sizeof...(Us) == rank>::type* = nullptr> - __host__ __device__ - HIP_vector_type(Us... xs) noexcept { data = Native_vec_{static_cast(xs)...}; } - __host__ __device__ - HIP_vector_type(const HIP_vector_type&) = default; - __host__ __device__ - HIP_vector_type(HIP_vector_type&&) = default; - __host__ __device__ - ~HIP_vector_type() = default; - - __host__ __device__ - HIP_vector_type& operator=(const HIP_vector_type&) = default; - __host__ __device__ - HIP_vector_type& operator=(HIP_vector_type&&) = default; - - // Operators - __host__ __device__ - HIP_vector_type& operator++() noexcept - { - return *this += HIP_vector_type{1}; - } - __host__ __device__ - HIP_vector_type operator++(int) noexcept - { - auto tmp(*this); - ++*this; - return tmp; - } - __host__ __device__ - HIP_vector_type& operator--() noexcept - { - return *this -= HIP_vector_type{1}; - } - __host__ __device__ - HIP_vector_type operator--(int) noexcept - { - auto tmp(*this); - --*this; - return tmp; - } - __host__ __device__ - HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept - { - data += x.data; - return *this; - } - __host__ __device__ - HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept - { - data -= x.data; - return *this; - } - template< - typename U, - typename std::enable_if< - std::is_convertible{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type& operator-=(U x) noexcept - { - return *this -= HIP_vector_type{x}; - } - __host__ __device__ - HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept - { - data *= x.data; - return *this; - } - __host__ __device__ - HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept - { - data /= x.data; - return *this; - } - - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type operator-() noexcept - { - auto tmp(*this); - tmp.data = -tmp.data; - return tmp; - } - - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type operator~() noexcept - { - HIP_vector_type r{*this}; - r.data = ~r.data; - return r; - } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept - { - data %= x.data; - return *this; - } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept - { - data ^= x.data; - return *this; - } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept - { - data |= x.data; - return *this; - } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept - { - data &= x.data; - return *this; - } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept - { - data >>= x.data; - return *this; - } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __host__ __device__ - HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept - { - data <<= x.data; - return *this; - } - }; - - - template - __host__ __device__ - inline - HIP_vector_type operator+( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} += y; - } - template - __host__ __device__ - inline - HIP_vector_type operator+( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} += y; - } - template - __host__ __device__ - inline - HIP_vector_type operator+( - U x, const HIP_vector_type& y) noexcept - { - return y + x; - } - - template - __host__ __device__ - inline - HIP_vector_type operator-( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} -= y; - } - template - __host__ __device__ - inline - HIP_vector_type operator-( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} -= y; - } - template - __host__ __device__ - inline - HIP_vector_type operator-( - U x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} -= y; - } - - template - __host__ __device__ - inline - HIP_vector_type operator*( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} *= y; - } - template - __host__ __device__ - inline - HIP_vector_type operator*( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} *= y; - } - template - __host__ __device__ - inline - HIP_vector_type operator*( - U x, const HIP_vector_type& y) noexcept - { - return y * x; - } - - template - __host__ __device__ - inline - HIP_vector_type operator/( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} /= y; - } - template - __host__ __device__ - inline - HIP_vector_type operator/( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} /= y; - } - template - __host__ __device__ - inline - HIP_vector_type operator/( - U x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} /= y; - } - - template - __host__ __device__ - inline - bool operator==( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - auto tmp = x.data == y.data; - for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; - return true; - } - template - __host__ __device__ - inline - bool operator==(const HIP_vector_type& x, U y) noexcept - { - return x == HIP_vector_type{y}; - } - template - __host__ __device__ - inline - bool operator==(U x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} == y; - } - - template - __host__ __device__ - inline - bool operator!=( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return !(x == y); - } - template - __host__ __device__ - inline - bool operator!=(const HIP_vector_type& x, U y) noexcept - { - return !(x == y); - } - template - __host__ __device__ - inline - bool operator!=(U x, const HIP_vector_type& y) noexcept - { - return !(x == y); - } - - template< - typename T, - unsigned int n, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator%( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} %= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator%( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} %= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator%( - U x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} %= y; - } - - template< - typename T, - unsigned int n, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator^( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} ^= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator^( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} ^= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator^( - U x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} ^= y; - } - - template< - typename T, - unsigned int n, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator|( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} |= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator|( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} |= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator|( - U x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} |= y; - } - - template< - typename T, - unsigned int n, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator&( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} &= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator&( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} &= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator&( - U x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} &= y; - } - - template< - typename T, - unsigned int n, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator>>( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} >>= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator>>( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} >>= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator>>( - U x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} >>= y; - } - - template< - typename T, - unsigned int n, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator<<( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} <<= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator<<( - const HIP_vector_type& x, U y) noexcept - { - return HIP_vector_type{x} <<= y; - } - template< - typename T, - unsigned int n, - typename U, - typename std::enable_if{}>* = nullptr> - inline - HIP_vector_type operator<<( - U x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{x} <<= y; - } - - #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ - using CUDA_name##1 = HIP_vector_type;\ - using CUDA_name##2 = HIP_vector_type;\ - using CUDA_name##3 = HIP_vector_type;\ - using CUDA_name##4 = HIP_vector_type; -#else - #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ - typedef T CUDA_name##_impl1 __NATIVE_VECTOR__(1, T);\ - typedef T CUDA_name##_impl2 __NATIVE_VECTOR__(2, T);\ - typedef T CUDA_name##_impl3 __NATIVE_VECTOR__(3, T);\ - typedef T CUDA_name##_impl4 __NATIVE_VECTOR__(4, T);\ - typedef struct {\ - union {\ - CUDA_name##_impl1 data;\ - struct {\ - T x;\ - };\ - };\ - } CUDA_name##1;\ - typedef struct {\ - union {\ - CUDA_name##_impl2 data;\ - struct {\ - T x;\ - T y;\ - };\ - };\ - } CUDA_name##2;\ - typedef struct {\ - union {\ - CUDA_name##_impl3 data;\ - struct {\ - T x;\ - T y;\ - T z;\ - };\ - };\ - } CUDA_name##3;\ - typedef struct {\ - union {\ - CUDA_name##_impl4 data;\ - struct {\ - T x;\ - T y;\ - T z;\ - T w;\ - };\ - };\ - } CUDA_name##4; -#endif - -__MAKE_VECTOR_TYPE__(uchar, unsigned char); -__MAKE_VECTOR_TYPE__(char, char); -__MAKE_VECTOR_TYPE__(ushort, unsigned short); -__MAKE_VECTOR_TYPE__(short, short); -__MAKE_VECTOR_TYPE__(uint, unsigned int); -__MAKE_VECTOR_TYPE__(int, int); -__MAKE_VECTOR_TYPE__(ulong, unsigned long); -__MAKE_VECTOR_TYPE__(long, long); -__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long); -__MAKE_VECTOR_TYPE__(longlong, long long); -__MAKE_VECTOR_TYPE__(float, float); -__MAKE_VECTOR_TYPE__(double, double); - -#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ - __device__ __host__ \ - static \ - inline \ - type make_##type(comp x) { type r = {x}; return r; } - -#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ - __device__ __host__ \ - static \ - inline \ - type make_##type(comp x, comp y) { type r = {x, y}; return r; } - -#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ - __device__ __host__ \ - static \ - inline \ - type make_##type(comp x, comp y, comp z) { type r = {x, y, z}; return r; } - -#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ - __device__ __host__ \ - static \ - inline \ - type make_##type(comp x, comp y, comp z, comp w) { \ - type r = {x, y, z, w}; \ - return r; \ - } - -DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1); -DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2); -DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3); -DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4); - -DECLOP_MAKE_ONE_COMPONENT(signed char, char1); -DECLOP_MAKE_TWO_COMPONENT(signed char, char2); -DECLOP_MAKE_THREE_COMPONENT(signed char, char3); -DECLOP_MAKE_FOUR_COMPONENT(signed char, char4); - -DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1); -DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2); -DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3); -DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4); - -DECLOP_MAKE_ONE_COMPONENT(signed short, short1); -DECLOP_MAKE_TWO_COMPONENT(signed short, short2); -DECLOP_MAKE_THREE_COMPONENT(signed short, short3); -DECLOP_MAKE_FOUR_COMPONENT(signed short, short4); - -DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1); -DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2); -DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3); -DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4); - -DECLOP_MAKE_ONE_COMPONENT(signed int, int1); -DECLOP_MAKE_TWO_COMPONENT(signed int, int2); -DECLOP_MAKE_THREE_COMPONENT(signed int, int3); -DECLOP_MAKE_FOUR_COMPONENT(signed int, int4); - -DECLOP_MAKE_ONE_COMPONENT(float, float1); -DECLOP_MAKE_TWO_COMPONENT(float, float2); -DECLOP_MAKE_THREE_COMPONENT(float, float3); -DECLOP_MAKE_FOUR_COMPONENT(float, float4); - -DECLOP_MAKE_ONE_COMPONENT(double, double1); -DECLOP_MAKE_TWO_COMPONENT(double, double2); -DECLOP_MAKE_THREE_COMPONENT(double, double3); -DECLOP_MAKE_FOUR_COMPONENT(double, double4); - -DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1); -DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2); -DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3); -DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4); - -DECLOP_MAKE_ONE_COMPONENT(signed long, long1); -DECLOP_MAKE_TWO_COMPONENT(signed long, long2); -DECLOP_MAKE_THREE_COMPONENT(signed long, long3); -DECLOP_MAKE_FOUR_COMPONENT(signed long, long4); - -DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1); -DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2); -DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3); -DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4); - -DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1); -DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2); -DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3); -DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4); -#else // defined(_MSC_VER) -#include -#include -#include -#include - -typedef union { char data; } char1; -typedef union { char data[2]; } char2; -typedef union { char data[4]; } char4; -typedef union { char4 data; } char3; -typedef union { __m64 data; } char8; -typedef union { __m128i data; } char16; - -typedef union { unsigned char data; } uchar1; -typedef union { unsigned char data[2]; } uchar2; -typedef union { unsigned char data[4]; } uchar4; -typedef union { uchar4 data; } uchar3; -typedef union { __m64 data; } uchar8; -typedef union { __m128i data; } uchar16; - -typedef union { short data; } short1; -typedef union { short data[2]; } short2; -typedef union { __m64 data; } short4; -typedef union { short4 data; } short3; -typedef union { __m128i data; } short8; -typedef union { __m128i data[2]; } short16; - -typedef union { unsigned short data; } ushort1; -typedef union { unsigned short data[2]; } ushort2; -typedef union { __m64 data; } ushort4; -typedef union { ushort4 data; } ushort3; -typedef union { __m128i data; } ushort8; -typedef union { __m128i data[2]; } ushort16; - -typedef union { int data; } int1; -typedef union { __m64 data; } int2; -typedef union { __m128i data; } int4; -typedef union { int4 data; } int3; -typedef union { __m128i data[2]; } int8; -typedef union { __m128i data[4];} int16; - -typedef union { unsigned int data; } uint1; -typedef union { __m64 data; } uint2; -typedef union { __m128i data; } uint4; -typedef union { uint4 data; } uint3; -typedef union { __m128i data[2]; } uint8; -typedef union { __m128i data[4]; } uint16; - -#if !defined(_WIN64) -typedef union { int data; } long1; -typedef union { __m64 data; } long2; -typedef union { __m128i data; } long4; -typedef union { long4 data; } long3; -typedef union { __m128i data[2]; } long8; -typedef union { __m128i data[4]; } long16; - -typedef union { unsigned int data; } ulong1; -typedef union { __m64 data; } ulong2; -typedef union { __m128i data; } ulong4; -typedef union { ulong4 data; } ulong3; -typedef union { __m128i data[2]; } ulong8; -typedef union { __m128i data[4]; } ulong16; -#else // defined(_WIN64) -typedef union { __m64 data; } long1; -typedef union { __m128i data; } long2; -typedef union { __m128i data[2]; } long4; -typedef union { long4 data; } long3; -typedef union { __m128i data[4]; } long8; -typedef union { __m128i data[8]; } long16; - -typedef union { __m64 data; } ulong1; -typedef union { __m128i data; } ulong2; -typedef union { __m128i data[2]; } ulong4; -typedef union { ulong4 data; } ulong3; -typedef union { __m128i data[4]; } ulong8; -typedef union { __m128i data[8]; } ulong16; -#endif // defined(_WIN64) - -typedef union { __m64 data; } longlong1; -typedef union { __m128i data; } longlong2; -typedef union { __m128i data[2]; } longlong4; -typedef union { longlong4 data; } longlong3; -typedef union { __m128i data[4]; } longlong8; -typedef union { __m128i data[8]; } longlong16; - -typedef union { __m64 data; } ulonglong1; -typedef union { __m128i data; } ulonglong2; -typedef union { __m128i data[2]; } ulonglong4; -typedef union { ulonglong4 data; } ulonglong3; -typedef union { __m128i data[4]; } ulonglong8; -typedef union { __m128i data[8]; } ulonglong16; - -typedef union { float data; } float1; -typedef union { __m64 data; } float2; -typedef union { __m128 data; } float4; -typedef union { float4 data; } float3; -typedef union { __m256 data; } float8; -typedef union { __m256 data[2]; } float16; - -typedef union { double data; } double1; -typedef union { __m128d data; } double2; -typedef union { __m256d data; } double4; -typedef union { double4 data; } double3; -typedef union { __m256d data[2]; } double8; -typedef union { __m256d data[4]; } double16; - -#endif // defined(_MSC_VER) -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/host_defines.h b/src/utils/amd_hip/hip/hcc_detail/host_defines.h deleted file mode 100644 index 5d1c3d8f6..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/host_defines.h +++ /dev/null @@ -1,95 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * @file hcc_detail/host_defines.h - * @brief TODO-doc - */ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H - - -// Add guard to Generic Grid Launch method -#ifndef GENERIC_GRID_LAUNCH -#define GENERIC_GRID_LAUNCH 1 -#endif - -#ifdef __HCC__ -/** - * Function and kernel markers - */ -#define __host__ __attribute__((cpu)) -#define __device__ __attribute__((hc)) - -#if GENERIC_GRID_LAUNCH == 0 -#define __global__ __attribute__((hc_grid_launch)) __attribute__((used)) -#else -#if __hcc_workweek__ >= 17481 -#define __global__ __attribute__((annotate("__HIP_global_function__"), cpu, hc, used)) -#else -#define __global__ __attribute__((hc, used)) -#endif -#endif // GENERIC_GRID_LAUNCH - -#define __noinline__ __attribute__((noinline)) -#define __forceinline__ inline __attribute__((always_inline)) - - -/* - * Variable Type Qualifiers: - */ -// _restrict is supported by the compiler -#define __shared__ tile_static -#define __constant__ __attribute__((hc)) - -#elif defined(__clang__) && defined(__HIP__) - -#define __host__ __attribute__((host)) -#define __device__ __attribute__((device)) -#define __global__ __attribute__((global)) -#define __shared__ __attribute__((shared)) -#define __constant__ __attribute__((constant)) - -#define __noinline__ __attribute__((noinline)) -#define __forceinline__ inline __attribute__((always_inline)) - -#else - -// Non-HCC compiler -/** - * Function and kernel markers - */ -#define __host__ -#define __device__ - -#define __global__ - -#define __noinline__ -#define __forceinline__ - -#define __shared__ -#define __constant__ - -#endif - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/llvm_intrinsics.h b/src/utils/amd_hip/hip/hcc_detail/llvm_intrinsics.h deleted file mode 100644 index dc6fd05c5..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/llvm_intrinsics.h +++ /dev/null @@ -1,70 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * @file hcc_detail/llvm_intrinsics.h - * @brief Contains declarations for wrapper functions for llvm intrinsics - * like llvm.amdgcn.s.barrier. - */ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H - -#include "hip/hcc_detail/host_defines.h" - -__device__ -__attribute__((convergent)) -ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32"); - -__device__ -unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); - -__device__ -unsigned int __llvm_bitrev_b32(unsigned int src0) __asm("llvm.bitreverse.i32"); - -__device__ -uint64_t __llvm_bitrev_b64(uint64_t src0) __asm("llvm.bitreverse.i64"); - -extern -__device__ -__attribute__((const)) -unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo"); - -extern -__device__ -__attribute__((const)) -unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi"); - -__device__ -int __llvm_amdgcn_ds_bpermute(int index, int src) __asm("llvm.amdgcn.ds.bpermute"); - -__device__ -int __llvm_amdgcn_ds_permute(int index, int src) __asm("llvm.amdgcn.ds.permute"); - -__device__ -int __llvm_amdgcn_ds_swizzle(int index, int pattern) __asm("llvm.amdgcn.ds.swizzle"); - -__device__ -int __llvm_amdgcn_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, - bool bound_ctrl) __asm("llvm.amdgcn.mov.dpp.i32"); - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/macro_based_grid_launch.hpp b/src/utils/amd_hip/hip/hcc_detail/macro_based_grid_launch.hpp deleted file mode 100644 index 8726b60dc..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/macro_based_grid_launch.hpp +++ /dev/null @@ -1,798 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "concepts.hpp" -#include "helpers.hpp" - -#include "hc.hpp" -#include "hip/hip_hcc.h" -#include "hip_runtime.h" - -#include -#include -#include -#include -#include - -namespace hip_impl { -namespace { -struct New_grid_launch_tag {}; -struct Old_grid_launch_tag {}; - -template -class RAII_guard { - D dtor_; - - public: - RAII_guard() = default; - - RAII_guard(const C& ctor, D dtor) : dtor_{std::move(dtor)} { ctor(); } - - RAII_guard(const RAII_guard&) = default; - RAII_guard(RAII_guard&&) = default; - - RAII_guard& operator=(const RAII_guard&) = default; - RAII_guard& operator=(RAII_guard&&) = default; - - ~RAII_guard() { dtor_(); } -}; - -template -RAII_guard make_RAII_guard(const C& ctor, D dtor) { - return RAII_guard{ctor, std::move(dtor)}; -} - -template -using is_new_grid_launch_t = typename std::conditional{}, New_grid_launch_tag, - Old_grid_launch_tag>::type; -} // namespace - -// TODO: - dispatch rank should be derived from the domain dimensions passed -// in, and not always assumed to be 3; - -template -requires(Domain == - {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag, dim3 num_blocks, - dim3 dim_blocks, int group_mem_bytes, - const hc::accelerator_view& acc_v, K k) { - const auto d = - hc::extent<3>{num_blocks.z * dim_blocks.z, num_blocks.y * dim_blocks.y, - num_blocks.x * dim_blocks.x} - .tile_with_dynamic(dim_blocks.z, dim_blocks.y, dim_blocks.x, group_mem_bytes); - - try { - hc::parallel_for_each(acc_v, d, k); - } catch (std::exception& ex) { - std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl; - throw; - } -} - -// TODO: these are workarounds, they should be removed. - -hc::accelerator_view lock_stream_hip_(hipStream_t&, void*&); -void print_prelaunch_trace_(const char*, dim3, dim3, int, hipStream_t); -void unlock_stream_hip_(hipStream_t, void*, const char*, hc::accelerator_view*); - -template -requires(Domain == {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag, - dim3 num_blocks, dim3 dim_blocks, - int group_mem_bytes, - hipStream_t stream, - const char* kernel_name, K k) { - void* lck_stream = nullptr; - auto acc_v = lock_stream_hip_(stream, lck_stream); - auto stream_guard = - make_RAII_guard(std::bind(print_prelaunch_trace_, kernel_name, num_blocks, dim_blocks, - group_mem_bytes, stream), - std::bind(unlock_stream_hip_, stream, lck_stream, kernel_name, &acc_v)); - - try { - grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), - group_mem_bytes, acc_v, std::move(k)); - } catch (std::exception& ex) { - std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl; - throw; - } -} - -template -requires(Domain == - {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_(Old_grid_launch_tag, - dim3 num_blocks, dim3 dim_blocks, - int group_mem_bytes, - hipStream_t stream, K k) { - grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), - group_mem_bytes, std::move(stream), std::move(k)); -} - -template -requires(Domain == {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_( - Old_grid_launch_tag, dim3 num_blocks, dim3 dim_blocks, int group_mem_bytes, hipStream_t stream, - const char* kernel_name, K k) { - grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), - group_mem_bytes, std::move(stream), kernel_name, std::move(k)); -} - -template -requires(Domain == {Ts...}) inline std::enable_if_t< - !std::is_function::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks, - int group_mem_bytes, hipStream_t stream, - const char* kernel_name, K k) { - grid_launch_hip_impl_(is_new_grid_launch_t{}, std::move(num_blocks), - std::move(dim_blocks), group_mem_bytes, std::move(stream), kernel_name, - std::move(k)); -} - -template -requires(Domain == {Ts...}) inline std::enable_if_t< - !std::is_function::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks, - int group_mem_bytes, hipStream_t stream, K k) { - grid_launch_hip_impl_(is_new_grid_launch_t{}, std::move(num_blocks), - std::move(dim_blocks), group_mem_bytes, std::move(stream), std::move(k)); -} - -// TODO: these are temporary and purposefully noisy and disruptive. -#define make_kernel_name_hip(k, n) \ - HIP_kernel_functor_name_begin##_##k##_##HIP_kernel_functor_name_end##_##n - -#define make_kernel_functor_hip_30(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ - p22, p23, p24, p25, p26, p27) \ - struct make_kernel_name_hip(function_name, 28) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - std::decay_t _p19_; \ - std::decay_t _p20_; \ - std::decay_t _p21_; \ - std::decay_t _p22_; \ - std::decay_t _p23_; \ - std::decay_t _p24_; \ - std::decay_t _p25_; \ - std::decay_t _p26_; \ - std::decay_t _p27_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ - _p22_, _p23_, _p24_, _p25_, _p26_, _p27_); \ - } \ - } -#define make_kernel_functor_hip_29(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ - p22, p23, p24, p25, p26) \ - struct make_kernel_name_hip(function_name, 27) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - std::decay_t _p19_; \ - std::decay_t _p20_; \ - std::decay_t _p21_; \ - std::decay_t _p22_; \ - std::decay_t _p23_; \ - std::decay_t _p24_; \ - std::decay_t _p25_; \ - std::decay_t _p26_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ - _p22_, _p23_, _p24_, _p25_, _p26_); \ - } \ - } -#define make_kernel_functor_hip_28(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ - p22, p23, p24, p25) \ - struct make_kernel_name_hip(function_name, 26) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - std::decay_t _p19_; \ - std::decay_t _p20_; \ - std::decay_t _p21_; \ - std::decay_t _p22_; \ - std::decay_t _p23_; \ - std::decay_t _p24_; \ - std::decay_t _p25_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ - _p22_, _p23_, _p24_, _p25_); \ - } \ - } -#define make_kernel_functor_hip_27(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ - p22, p23, p24) \ - struct make_kernel_name_hip(function_name, 25) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - std::decay_t _p19_; \ - std::decay_t _p20_; \ - std::decay_t _p21_; \ - std::decay_t _p22_; \ - std::decay_t _p23_; \ - std::decay_t _p24_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ - _p22_, _p23_, _p24_); \ - } \ - } -#define make_kernel_functor_hip_26(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ - p22, p23) \ - struct make_kernel_name_hip(function_name, 24) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - std::decay_t _p19_; \ - std::decay_t _p20_; \ - std::decay_t _p21_; \ - std::decay_t _p22_; \ - std::decay_t _p23_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ - _p22_, _p23_); \ - } \ - } -#define make_kernel_functor_hip_25(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ - p22) \ - struct make_kernel_name_hip(function_name, 23) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - std::decay_t _p19_; \ - std::decay_t _p20_; \ - std::decay_t _p21_; \ - std::decay_t _p22_; \ - __attribute__((used, flatten)) void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ - _p22_); \ - } \ - } -#define make_kernel_functor_hip_24(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21) \ - struct make_kernel_name_hip(function_name, 22) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - std::decay_t _p19_; \ - std::decay_t _p20_; \ - std::decay_t _p21_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_); \ - } \ - } -#define make_kernel_functor_hip_23(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20) \ - struct make_kernel_name_hip(function_name, 21) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - std::decay_t _p19_; \ - std::decay_t _p20_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_); \ - } \ - } -#define make_kernel_functor_hip_22(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19) \ - struct make_kernel_name_hip(function_name, 20) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - std::decay_t _p19_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_); \ - } \ - } -#define make_kernel_functor_hip_21(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17, p18) \ - struct make_kernel_name_hip(function_name, 19) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - std::decay_t _p18_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_); \ - } \ - } -#define make_kernel_functor_hip_20(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16, p17) \ - struct make_kernel_name_hip(function_name, 18) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - std::decay_t _p17_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_, _p17_); \ - } \ - } -#define make_kernel_functor_hip_19(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15, p16) \ - struct make_kernel_name_hip(function_name, 17) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - std::decay_t _p16_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_, _p16_); \ - } \ - } -#define make_kernel_functor_hip_18(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14, p15) \ - struct make_kernel_name_hip(function_name, 16) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - std::decay_t _p15_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_, _p15_); \ - } \ - } -#define make_kernel_functor_hip_17(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13, p14) \ - struct make_kernel_name_hip(function_name, 15) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - std::decay_t _p14_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_, _p14_); \ - } \ - } -#define make_kernel_functor_hip_16(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12, p13) \ - struct make_kernel_name_hip(function_name, 14) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - std::decay_t _p13_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_, _p13_); \ - } \ - } -#define make_kernel_functor_hip_15(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11, p12) \ - struct make_kernel_name_hip(function_name, 13) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - std::decay_t _p12_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ - _p12_); \ - } \ - } -#define make_kernel_functor_hip_14(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10, p11) \ - struct make_kernel_name_hip(function_name, 12) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - std::decay_t _p11_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_); \ - } \ - } -#define make_kernel_functor_hip_13(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9, p10) \ - struct make_kernel_name_hip(function_name, 11) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - std::decay_t _p10_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { \ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_); \ - } \ - } -#define make_kernel_functor_hip_12(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ - p9) \ - struct make_kernel_name_hip(function_name, 10) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - std::decay_t _p9_; \ - void operator()(const hc::tiled_index<3>&) const \ - [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_); } \ - } -#define make_kernel_functor_hip_11(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8) \ - struct make_kernel_name_hip(function_name, 9) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - std::decay_t _p8_; \ - void operator()(const hc::tiled_index<3>&) const \ - [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_); } \ - } -#define make_kernel_functor_hip_10(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7) \ - struct make_kernel_name_hip(function_name, 8) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - std::decay_t _p7_; \ - void operator()(const hc::tiled_index<3>&) const \ - [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_); } \ - } -#define make_kernel_functor_hip_9(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6) \ - struct make_kernel_name_hip(function_name, 7) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - std::decay_t _p6_; \ - void operator()(const hc::tiled_index<3>&) const \ - [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_); } \ - } -#define make_kernel_functor_hip_8(function_name, kernel_name, p0, p1, p2, p3, p4, p5) \ - struct make_kernel_name_hip(function_name, 6) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - std::decay_t _p5_; \ - void operator()(const hc::tiled_index<3>&) const \ - [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_); } \ - } -#define make_kernel_functor_hip_7(function_name, kernel_name, p0, p1, p2, p3, p4) \ - struct make_kernel_name_hip(function_name, 5) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - std::decay_t _p4_; \ - void operator()(const hc::tiled_index<3>&) const \ - [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_); } \ - } -#define make_kernel_functor_hip_6(function_name, kernel_name, p0, p1, p2, p3) \ - struct make_kernel_name_hip(function_name, 4) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - std::decay_t _p3_; \ - void operator()(const hc::tiled_index<3>&) const \ - [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_); } \ - } -#define make_kernel_functor_hip_5(function_name, kernel_name, p0, p1, p2) \ - struct make_kernel_name_hip(function_name, 3) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - std::decay_t _p2_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_, _p2_); } \ - } -#define make_kernel_functor_hip_4(function_name, kernel_name, p0, p1) \ - struct make_kernel_name_hip(function_name, 2) { \ - std::decay_t _p0_; \ - std::decay_t _p1_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_); } \ - } -#define fofo(f, n) kernel_prefix_hip##f##kernel_suffix_hip##n -#define make_kernel_functor_hip_3(function_name, kernel_name, p0) \ - struct make_kernel_name_hip(function_name, 1) { \ - std::decay_t _p0_; \ - void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_); } \ - } -#define make_kernel_functor_hip_2(function_name, kernel_name) \ - struct make_kernel_name_hip(function_name, 0) { \ - void operator()(const hc::tiled_index<3>&)[[hc]] { return kernel_name(hipLaunchParm{}); } \ - } -#define make_kernel_functor_hip_1(...) -#define make_kernel_functor_hip_0(...) -#define make_kernel_functor_hip_(...) overload_macro_hip_(make_kernel_functor_hip_, __VA_ARGS__) - - -#define hipLaunchNamedKernelGGL(function_name, kernel_name, num_blocks, dim_blocks, \ - group_mem_bytes, stream, ...) \ - do { \ - make_kernel_functor_hip_(function_name, kernel_name, __VA_ARGS__) \ - hip_kernel_functor_impl_{__VA_ARGS__}; \ - hip_impl::grid_launch_hip_(num_blocks, dim_blocks, group_mem_bytes, stream, #kernel_name, \ - hip_kernel_functor_impl_); \ - } while (0) - -#define hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...) \ - do { \ - hipLaunchNamedKernelGGL(unnamed, kernel_name, num_blocks, dim_blocks, group_mem_bytes, \ - stream, ##__VA_ARGS__); \ - } while (0) - -#define hipLaunchKernel(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...) \ - do { \ - hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, \ - hipLaunchParm{}, ##__VA_ARGS__); \ - } while (0) -} // namespace hip_impl \ No newline at end of file diff --git a/src/utils/amd_hip/hip/hcc_detail/math_functions.h b/src/utils/amd_hip/hip/hcc_detail/math_functions.h deleted file mode 100644 index 8ac87425b..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/math_functions.h +++ /dev/null @@ -1,1501 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "hip_fp16_math_fwd.h" -#include "hip_vector_types.h" -#include "math_fwd.h" - -#include - -#include -#include -#include -#include -#include - -// HCC's own math functions should be included first, otherwise there will -// be conflicts when hip/math_functions.h is included before hip/hip_runtime.h. -#ifdef __HCC__ -#include "kalmar_math.h" -#endif - -#pragma push_macro("__DEVICE__") -#pragma push_macro("__RETURN_TYPE") - -#ifdef __HCC__ -#define __DEVICE__ __device__ -#define __RETURN_TYPE int -#else // to be consistent with __clang_cuda_math_forward_declares -#define __DEVICE__ static __device__ -#define __RETURN_TYPE bool -#endif - -__DEVICE__ -inline -uint64_t __make_mantissa_base8(const char* tagp) -{ - uint64_t r = 0; - while (tagp) { - char tmp = *tagp; - - if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0'; - else return 0; - - ++tagp; - } - - return r; -} - -__DEVICE__ -inline -uint64_t __make_mantissa_base10(const char* tagp) -{ - uint64_t r = 0; - while (tagp) { - char tmp = *tagp; - - if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0'; - else return 0; - - ++tagp; - } - - return r; -} - -__DEVICE__ -inline -uint64_t __make_mantissa_base16(const char* tagp) -{ - uint64_t r = 0; - while (tagp) { - char tmp = *tagp; - - if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0'; - else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10; - else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10; - else return 0; - - ++tagp; - } - - return r; -} - -__DEVICE__ -inline -uint64_t __make_mantissa(const char* tagp) -{ - if (!tagp) return 0u; - - if (*tagp == '0') { - ++tagp; - - if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp); - else return __make_mantissa_base8(tagp); - } - - return __make_mantissa_base10(tagp); -} - -// DOT FUNCTIONS -#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ -__DEVICE__ -inline -int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) { - return __ockl_sdot2(a.data, b.data, c, saturate); -} -__DEVICE__ -inline -uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) { - return __ockl_udot2(a.data, b.data, c, saturate); -} -__DEVICE__ -inline -int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) { - return __ockl_sdot4(a.data, b.data, c, saturate); -} -__DEVICE__ -inline -uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) { - return __ockl_udot4(a.data, b.data, c, saturate); -} -__DEVICE__ -inline -int amd_mixed_dot(int a, int b, int c, bool saturate) { - return __ockl_sdot8(a, b, c, saturate); -} -__DEVICE__ -inline -uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) { - return __ockl_udot8(a, b, c, saturate); -} -#endif - -// BEGIN FLOAT -__DEVICE__ -inline -float abs(float x) { return __ocml_fabs_f32(x); } -__DEVICE__ -inline -float acosf(float x) { return __ocml_acos_f32(x); } -__DEVICE__ -inline -float acoshf(float x) { return __ocml_acosh_f32(x); } -__DEVICE__ -inline -float asinf(float x) { return __ocml_asin_f32(x); } -__DEVICE__ -inline -float asinhf(float x) { return __ocml_asinh_f32(x); } -__DEVICE__ -inline -float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); } -__DEVICE__ -inline -float atanf(float x) { return __ocml_atan_f32(x); } -__DEVICE__ -inline -float atanhf(float x) { return __ocml_atanh_f32(x); } -__DEVICE__ -inline -float cbrtf(float x) { return __ocml_cbrt_f32(x); } -__DEVICE__ -inline -float ceilf(float x) { return __ocml_ceil_f32(x); } -__DEVICE__ -inline -float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); } -__DEVICE__ -inline -float cosf(float x) { return __ocml_cos_f32(x); } -__DEVICE__ -inline -float coshf(float x) { return __ocml_cosh_f32(x); } -__DEVICE__ -inline -float cospif(float x) { return __ocml_cospi_f32(x); } -__DEVICE__ -inline -float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); } -__DEVICE__ -inline -float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); } -__DEVICE__ -inline -float erfcf(float x) { return __ocml_erfc_f32(x); } -__DEVICE__ -inline -float erfcinvf(float x) { return __ocml_erfcinv_f32(x); } -__DEVICE__ -inline -float erfcxf(float x) { return __ocml_erfcx_f32(x); } -__DEVICE__ -inline -float erff(float x) { return __ocml_erf_f32(x); } -__DEVICE__ -inline -float erfinvf(float x) { return __ocml_erfinv_f32(x); } -__DEVICE__ -inline -float exp10f(float x) { return __ocml_exp10_f32(x); } -__DEVICE__ -inline -float exp2f(float x) { return __ocml_exp2_f32(x); } -__DEVICE__ -inline -float expf(float x) { return __ocml_exp_f32(x); } -__DEVICE__ -inline -float expm1f(float x) { return __ocml_expm1_f32(x); } -__DEVICE__ -inline -float fabsf(float x) { return __ocml_fabs_f32(x); } -__DEVICE__ -inline -float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); } -__DEVICE__ -inline -float fdividef(float x, float y) { return x / y; } -__DEVICE__ -inline -float floorf(float x) { return __ocml_floor_f32(x); } -__DEVICE__ -inline -float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); } -__DEVICE__ -inline -float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); } -__DEVICE__ -inline -float fminf(float x, float y) { return __ocml_fmin_f32(x, y); } -__DEVICE__ -inline -float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); } -__DEVICE__ -inline -float frexpf(float x, int* nptr) -{ - int tmp; - float r = - __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp); - *nptr = tmp; - - return r; -} -__DEVICE__ -inline -float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); } -__DEVICE__ -inline -int ilogbf(float x) { return __ocml_ilogb_f32(x); } -__DEVICE__ -inline -__RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); } -__DEVICE__ -inline -__RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); } -__DEVICE__ -inline -__RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); } -__DEVICE__ -inline -float j0f(float x) { return __ocml_j0_f32(x); } -__DEVICE__ -inline -float j1f(float x) { return __ocml_j1_f32(x); } -__DEVICE__ -inline -float jnf(int n, float x) -{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm - // for linear recurrences to get O(log n) steps, but it's unclear if - // it'd be beneficial in this case. - if (n == 0) return j0f(x); - if (n == 1) return j1f(x); - - float x0 = j0f(x); - float x1 = j1f(x); - for (int i = 1; i < n; ++i) { - float x2 = (2 * i) / x * x1 - x0; - x0 = x1; - x1 = x2; - } - - return x1; -} -__DEVICE__ -inline -float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); } -__DEVICE__ -inline -float lgammaf(float x) { return __ocml_lgamma_f32(x); } -__DEVICE__ -inline -long long int llrintf(float x) { return __ocml_rint_f32(x); } -__DEVICE__ -inline -long long int llroundf(float x) { return __ocml_round_f32(x); } -__DEVICE__ -inline -float log10f(float x) { return __ocml_log10_f32(x); } -__DEVICE__ -inline -float log1pf(float x) { return __ocml_log1p_f32(x); } -__DEVICE__ -inline -float log2f(float x) { return __ocml_log2_f32(x); } -__DEVICE__ -inline -float logbf(float x) { return __ocml_logb_f32(x); } -__DEVICE__ -inline -float logf(float x) { return __ocml_log_f32(x); } -__DEVICE__ -inline -long int lrintf(float x) { return __ocml_rint_f32(x); } -__DEVICE__ -inline -long int lroundf(float x) { return __ocml_round_f32(x); } -__DEVICE__ -inline -float modff(float x, float* iptr) -{ - float tmp; - float r = - __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp); - *iptr = tmp; - - return r; -} -__DEVICE__ -inline -float nanf(const char* tagp) -{ - union { - float val; - struct ieee_float { - uint32_t mantissa : 22; - uint32_t quiet : 1; - uint32_t exponent : 8; - uint32_t sign : 1; - } bits; - - static_assert(sizeof(float) == sizeof(ieee_float), ""); - } tmp; - - tmp.bits.sign = 0u; - tmp.bits.exponent = ~0u; - tmp.bits.quiet = 1u; - tmp.bits.mantissa = __make_mantissa(tagp); - - return tmp.val; -} -__DEVICE__ -inline -float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } -__DEVICE__ -inline -float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); } -__DEVICE__ -inline -float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); } -__DEVICE__ -inline -float norm4df(float x, float y, float z, float w) -{ - return __ocml_len4_f32(x, y, z, w); -} -__DEVICE__ -inline -float normcdff(float x) { return __ocml_ncdf_f32(x); } -__DEVICE__ -inline -float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); } -__DEVICE__ -inline -float normf(int dim, const float* a) -{ // TODO: placeholder until OCML adds support. - float r = 0; - while (dim--) { r += a[0] * a[0]; ++a; } - - return __ocml_sqrt_f32(r); -} -__DEVICE__ -inline -float powf(float x, float y) { return __ocml_pow_f32(x, y); } -__DEVICE__ -inline -float rcbrtf(float x) { return __ocml_rcbrt_f32(x); } -__DEVICE__ -inline -float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); } -__DEVICE__ -inline -float remquof(float x, float y, int* quo) -{ - int tmp; - float r = - __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp); - *quo = tmp; - - return r; -} -__DEVICE__ -inline -float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); } -__DEVICE__ -inline -float rintf(float x) { return __ocml_rint_f32(x); } -__DEVICE__ -inline -float rnorm3df(float x, float y, float z) -{ - return __ocml_rlen3_f32(x, y, z); -} - -__DEVICE__ -inline -float rnorm4df(float x, float y, float z, float w) -{ - return __ocml_rlen4_f32(x, y, z, w); -} -__DEVICE__ -inline -float rnormf(int dim, const float* a) -{ // TODO: placeholder until OCML adds support. - float r = 0; - while (dim--) { r += a[0] * a[0]; ++a; } - - return __ocml_rsqrt_f32(r); -} -__DEVICE__ -inline -float roundf(float x) { return __ocml_round_f32(x); } -__DEVICE__ -inline -float rsqrtf(float x) { return __ocml_rsqrt_f32(x); } -__DEVICE__ -inline -float scalblnf(float x, long int n) -{ - return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n); -} -__DEVICE__ -inline -float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); } -__DEVICE__ -inline -__RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); } -__DEVICE__ -inline -void sincosf(float x, float* sptr, float* cptr) -{ - float tmp; - - *sptr = - __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp); - *cptr = tmp; -} -__DEVICE__ -inline -void sincospif(float x, float* sptr, float* cptr) -{ - float tmp; - - *sptr = - __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp); - *cptr = tmp; -} -__DEVICE__ -inline -float sinf(float x) { return __ocml_sin_f32(x); } -__DEVICE__ -inline -float sinhf(float x) { return __ocml_sinh_f32(x); } -__DEVICE__ -inline -float sinpif(float x) { return __ocml_sinpi_f32(x); } -__DEVICE__ -inline -float sqrtf(float x) { return __ocml_sqrt_f32(x); } -__DEVICE__ -inline -float tanf(float x) { return __ocml_tan_f32(x); } -__DEVICE__ -inline -float tanhf(float x) { return __ocml_tanh_f32(x); } -__DEVICE__ -inline -float tgammaf(float x) { return __ocml_tgamma_f32(x); } -__DEVICE__ -inline -float truncf(float x) { return __ocml_trunc_f32(x); } -__DEVICE__ -inline -float y0f(float x) { return __ocml_y0_f32(x); } -__DEVICE__ -inline -float y1f(float x) { return __ocml_y1_f32(x); } -__DEVICE__ -inline -float ynf(int n, float x) -{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm - // for linear recurrences to get O(log n) steps, but it's unclear if - // it'd be beneficial in this case. Placeholder until OCML adds - // support. - if (n == 0) return y0f(x); - if (n == 1) return y1f(x); - - float x0 = y0f(x); - float x1 = y1f(x); - for (int i = 1; i < n; ++i) { - float x2 = (2 * i) / x * x1 - x0; - x0 = x1; - x1 = x2; - } - - return x1; -} - -// BEGIN INTRINSICS -__DEVICE__ -inline -float __cosf(float x) { return __ocml_native_cos_f32(x); } -__DEVICE__ -inline -float __exp10f(float x) { return __ocml_native_exp10_f32(x); } -__DEVICE__ -inline -float __expf(float x) { return __ocml_native_exp_f32(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } -#endif -__DEVICE__ -inline -float __fadd_rn(float x, float y) { return x + y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } -__DEVICE__ -inline -float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } -__DEVICE__ -inline -float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } -#endif -__DEVICE__ -inline -float __fdiv_rn(float x, float y) { return x / y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } -__DEVICE__ -inline -float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } -#endif -__DEVICE__ -inline -float __fdividef(float x, float y) { return x / y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fmaf_rd(float x, float y, float z) -{ - return __ocml_fma_rtn_f32(x, y, z); -} -#endif -__DEVICE__ -inline -float __fmaf_rn(float x, float y, float z) -{ - return __ocml_fma_f32(x, y, z); -} -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fmaf_ru(float x, float y, float z) -{ - return __ocml_fma_rtp_f32(x, y, z); -} -__DEVICE__ -inline -float __fmaf_rz(float x, float y, float z) -{ - return __ocml_fma_rtz_f32(x, y, z); -} -__DEVICE__ -inline -float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } -#endif -__DEVICE__ -inline -float __fmul_rn(float x, float y) { return x * y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } -__DEVICE__ -inline -float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } -__DEVICE__ -inline -float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); } -#endif -__DEVICE__ -inline -float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); } -__DEVICE__ -inline -float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); } -#endif -__DEVICE__ -inline -float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } -#endif -__DEVICE__ -inline -float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } -__DEVICE__ -inline -float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } -__DEVICE__ -inline -float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } -#endif -__DEVICE__ -inline -float __fsub_rn(float x, float y) { return x - y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } -__DEVICE__ -inline -float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } -#endif -__DEVICE__ -inline -float __log10f(float x) { return __ocml_native_log10_f32(x); } -__DEVICE__ -inline -float __log2f(float x) { return __ocml_native_log2_f32(x); } -__DEVICE__ -inline -float __logf(float x) { return __ocml_native_log_f32(x); } -__DEVICE__ -inline -float __powf(float x, float y) { return __ocml_pow_f32(x, y); } -__DEVICE__ -inline -float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); } -__DEVICE__ -inline -void __sincosf(float x, float* sptr, float* cptr) -{ - *sptr = __ocml_native_sin_f32(x); - *cptr = __ocml_native_cos_f32(x); -} -__DEVICE__ -inline -float __sinf(float x) { return __ocml_native_sin_f32(x); } -__DEVICE__ -inline -float __tanf(float x) { return __ocml_tan_f32(x); } -// END INTRINSICS -// END FLOAT - -// BEGIN DOUBLE -__DEVICE__ -inline -double abs(double x) { return __ocml_fabs_f64(x); } -__DEVICE__ -inline -double acos(double x) { return __ocml_acos_f64(x); } -__DEVICE__ -inline -double acosh(double x) { return __ocml_acosh_f64(x); } -__DEVICE__ -inline -double asin(double x) { return __ocml_asin_f64(x); } -__DEVICE__ -inline -double asinh(double x) { return __ocml_asinh_f64(x); } -__DEVICE__ -inline -double atan(double x) { return __ocml_atan_f64(x); } -__DEVICE__ -inline -double atan2(double x, double y) { return __ocml_atan2_f64(x, y); } -__DEVICE__ -inline -double atanh(double x) { return __ocml_atanh_f64(x); } -__DEVICE__ -inline -double cbrt(double x) { return __ocml_cbrt_f64(x); } -__DEVICE__ -inline -double ceil(double x) { return __ocml_ceil_f64(x); } -__DEVICE__ -inline -double copysign(double x, double y) { return __ocml_copysign_f64(x, y); } -__DEVICE__ -inline -double cos(double x) { return __ocml_cos_f64(x); } -__DEVICE__ -inline -double cosh(double x) { return __ocml_cosh_f64(x); } -__DEVICE__ -inline -double cospi(double x) { return __ocml_cospi_f64(x); } -__DEVICE__ -inline -double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); } -__DEVICE__ -inline -double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); } -__DEVICE__ -inline -double erf(double x) { return __ocml_erf_f64(x); } -__DEVICE__ -inline -double erfc(double x) { return __ocml_erfc_f64(x); } -__DEVICE__ -inline -double erfcinv(double x) { return __ocml_erfcinv_f64(x); } -__DEVICE__ -inline -double erfcx(double x) { return __ocml_erfcx_f64(x); } -__DEVICE__ -inline -double erfinv(double x) { return __ocml_erfinv_f64(x); } -__DEVICE__ -inline -double exp(double x) { return __ocml_exp_f64(x); } -__DEVICE__ -inline -double exp10(double x) { return __ocml_exp10_f64(x); } -__DEVICE__ -inline -double exp2(double x) { return __ocml_exp2_f64(x); } -__DEVICE__ -inline -double expm1(double x) { return __ocml_expm1_f64(x); } -__DEVICE__ -inline -double fabs(double x) { return __ocml_fabs_f64(x); } -__DEVICE__ -inline -double fdim(double x, double y) { return __ocml_fdim_f64(x, y); } -__DEVICE__ -inline -double floor(double x) { return __ocml_floor_f64(x); } -__DEVICE__ -inline -double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); } -__DEVICE__ -inline -double fmax(double x, double y) { return __ocml_fmax_f64(x, y); } -__DEVICE__ -inline -double fmin(double x, double y) { return __ocml_fmin_f64(x, y); } -__DEVICE__ -inline -double fmod(double x, double y) { return __ocml_fmod_f64(x, y); } -__DEVICE__ -inline -double frexp(double x, int* nptr) -{ - int tmp; - double r = - __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp); - *nptr = tmp; - - return r; -} -__DEVICE__ -inline -double hypot(double x, double y) { return __ocml_hypot_f64(x, y); } -__DEVICE__ -inline -int ilogb(double x) { return __ocml_ilogb_f64(x); } -__DEVICE__ -inline -__RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); } -__DEVICE__ -inline -__RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); } -__DEVICE__ -inline -__RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); } -__DEVICE__ -inline -double j0(double x) { return __ocml_j0_f64(x); } -__DEVICE__ -inline -double j1(double x) { return __ocml_j1_f64(x); } -__DEVICE__ -inline -double jn(int n, double x) -{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm - // for linear recurrences to get O(log n) steps, but it's unclear if - // it'd be beneficial in this case. Placeholder until OCML adds - // support. - if (n == 0) return j0f(x); - if (n == 1) return j1f(x); - - double x0 = j0f(x); - double x1 = j1f(x); - for (int i = 1; i < n; ++i) { - double x2 = (2 * i) / x * x1 - x0; - x0 = x1; - x1 = x2; - } - - return x1; -} -__DEVICE__ -inline -double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); } -__DEVICE__ -inline -double lgamma(double x) { return __ocml_lgamma_f64(x); } -__DEVICE__ -inline -long long int llrint(double x) { return __ocml_rint_f64(x); } -__DEVICE__ -inline -long long int llround(double x) { return __ocml_round_f64(x); } -__DEVICE__ -inline -double log(double x) { return __ocml_log_f64(x); } -__DEVICE__ -inline -double log10(double x) { return __ocml_log10_f64(x); } -__DEVICE__ -inline -double log1p(double x) { return __ocml_log1p_f64(x); } -__DEVICE__ -inline -double log2(double x) { return __ocml_log2_f64(x); } -__DEVICE__ -inline -double logb(double x) { return __ocml_logb_f64(x); } -__DEVICE__ -inline -long int lrint(double x) { return __ocml_rint_f64(x); } -__DEVICE__ -inline -long int lround(double x) { return __ocml_round_f64(x); } -__DEVICE__ -inline -double modf(double x, double* iptr) -{ - double tmp; - double r = - __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp); - *iptr = tmp; - - return r; -} -__DEVICE__ -inline -double nan(const char* tagp) -{ -#if !_WIN32 - union { - double val; - struct ieee_double { - uint64_t mantissa : 51; - uint32_t quiet : 1; - uint32_t exponent : 11; - uint32_t sign : 1; - } bits; - static_assert(sizeof(double) == sizeof(ieee_double), ""); - } tmp; - - tmp.bits.sign = 0u; - tmp.bits.exponent = ~0u; - tmp.bits.quiet = 1u; - tmp.bits.mantissa = __make_mantissa(tagp); - - return tmp.val; -#else - uint64_t val = __make_mantissa(tagp); - val |= 0xFFF << 51; - return reinterpret_cast(val); -#endif -} -__DEVICE__ -inline -double nearbyint(double x) { return __ocml_nearbyint_f64(x); } -__DEVICE__ -inline -double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); } -__DEVICE__ -inline -double norm(int dim, const double* a) -{ // TODO: placeholder until OCML adds support. - double r = 0; - while (dim--) { r += a[0] * a[0]; ++a; } - - return __ocml_sqrt_f64(r); -} -__DEVICE__ -inline -double norm3d(double x, double y, double z) -{ - return __ocml_len3_f64(x, y, z); -} -__DEVICE__ -inline -double norm4d(double x, double y, double z, double w) -{ - return __ocml_len4_f64(x, y, z, w); -} -__DEVICE__ -inline -double normcdf(double x) { return __ocml_ncdf_f64(x); } -__DEVICE__ -inline -double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); } -__DEVICE__ -inline -double pow(double x, double y) { return __ocml_pow_f64(x, y); } -__DEVICE__ -inline -double rcbrt(double x) { return __ocml_rcbrt_f64(x); } -__DEVICE__ -inline -double remainder(double x, double y) { return __ocml_remainder_f64(x, y); } -__DEVICE__ -inline -double remquo(double x, double y, int* quo) -{ - int tmp; - double r = - __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp); - *quo = tmp; - - return r; -} -__DEVICE__ -inline -double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); } -__DEVICE__ -inline -double rint(double x) { return __ocml_rint_f64(x); } -__DEVICE__ -inline -double rnorm(int dim, const double* a) -{ // TODO: placeholder until OCML adds support. - double r = 0; - while (dim--) { r += a[0] * a[0]; ++a; } - - return __ocml_rsqrt_f64(r); -} -__DEVICE__ -inline -double rnorm3d(double x, double y, double z) -{ - return __ocml_rlen3_f64(x, y, z); -} -__DEVICE__ -inline -double rnorm4d(double x, double y, double z, double w) -{ - return __ocml_rlen4_f64(x, y, z, w); -} -__DEVICE__ -inline -double round(double x) { return __ocml_round_f64(x); } -__DEVICE__ -inline -double rsqrt(double x) { return __ocml_rsqrt_f64(x); } -__DEVICE__ -inline -double scalbln(double x, long int n) -{ - return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n); -} -__DEVICE__ -inline -double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); } -__DEVICE__ -inline -__RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); } -__DEVICE__ -inline -double sin(double x) { return __ocml_sin_f64(x); } -__DEVICE__ -inline -void sincos(double x, double* sptr, double* cptr) -{ - double tmp; - *sptr = - __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp); - *cptr = tmp; -} -__DEVICE__ -inline -void sincospi(double x, double* sptr, double* cptr) -{ - double tmp; - *sptr = __ocml_sincospi_f64( - x, (__attribute__((address_space(5))) double*) &tmp); - *cptr = tmp; -} -__DEVICE__ -inline -double sinh(double x) { return __ocml_sinh_f64(x); } -__DEVICE__ -inline -double sinpi(double x) { return __ocml_sinpi_f64(x); } -__DEVICE__ -inline -double sqrt(double x) { return __ocml_sqrt_f64(x); } -__DEVICE__ -inline -double tan(double x) { return __ocml_tan_f64(x); } -__DEVICE__ -inline -double tanh(double x) { return __ocml_tanh_f64(x); } -__DEVICE__ -inline -double tgamma(double x) { return __ocml_tgamma_f64(x); } -__DEVICE__ -inline -double trunc(double x) { return __ocml_trunc_f64(x); } -__DEVICE__ -inline -double y0(double x) { return __ocml_y0_f64(x); } -__DEVICE__ -inline -double y1(double x) { return __ocml_y1_f64(x); } -__DEVICE__ -inline -double yn(int n, double x) -{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm - // for linear recurrences to get O(log n) steps, but it's unclear if - // it'd be beneficial in this case. Placeholder until OCML adds - // support. - if (n == 0) return j0f(x); - if (n == 1) return j1f(x); - - double x0 = j0f(x); - double x1 = j1f(x); - for (int i = 1; i < n; ++i) { - double x2 = (2 * i) / x * x1 - x0; - x0 = x1; - x1 = x2; - } - - return x1; -} - -// BEGIN INTRINSICS -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } -#endif -__DEVICE__ -inline -double __dadd_rn(double x, double y) { return x + y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } -__DEVICE__ -inline -double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } -__DEVICE__ -inline -double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } -#endif -__DEVICE__ -inline -double __ddiv_rn(double x, double y) { return x / y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } -__DEVICE__ -inline -double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } -__DEVICE__ -inline -double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } -#endif -__DEVICE__ -inline -double __dmul_rn(double x, double y) { return x * y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } -__DEVICE__ -inline -double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } -__DEVICE__ -inline -double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); } -#endif -__DEVICE__ -inline -double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); } -__DEVICE__ -inline -double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); } -__DEVICE__ -inline -double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } -#endif -__DEVICE__ -inline -double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } -__DEVICE__ -inline -double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } -__DEVICE__ -inline -double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } -#endif -__DEVICE__ -inline -double __dsub_rn(double x, double y) { return x - y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } -__DEVICE__ -inline -double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } -__DEVICE__ -inline -double __fma_rd(double x, double y, double z) -{ - return __ocml_fma_rtn_f64(x, y, z); -} -#endif -__DEVICE__ -inline -double __fma_rn(double x, double y, double z) -{ - return __ocml_fma_f64(x, y, z); -} -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __fma_ru(double x, double y, double z) -{ - return __ocml_fma_rtp_f64(x, y, z); -} -__DEVICE__ -inline -double __fma_rz(double x, double y, double z) -{ - return __ocml_fma_rtz_f64(x, y, z); -} -#endif -// END INTRINSICS -// END DOUBLE - -// BEGIN INTEGER -__DEVICE__ -inline -int abs(int x) -{ - int sgn = x >> (sizeof(int) * CHAR_BIT - 1); - return (x ^ sgn) - sgn; -} -__DEVICE__ -inline -long labs(long x) -{ - long sgn = x >> (sizeof(long) * CHAR_BIT - 1); - return (x ^ sgn) - sgn; -} -__DEVICE__ -inline -long long llabs(long long x) -{ - long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1); - return (x ^ sgn) - sgn; -} - -#if defined(__cplusplus) - __DEVICE__ - inline - long abs(long x) { return labs(x); } - __DEVICE__ - inline - long long abs(long long x) { return llabs(x); } -#endif -// END INTEGER - -__DEVICE__ -inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) { - return __ocml_fma_f16(x, y, z); -} - -__DEVICE__ -inline float fma(float x, float y, float z) { - return fmaf(x, y, z); -} - -#pragma push_macro("__DEF_FLOAT_FUN") -#pragma push_macro("__DEF_FLOAT_FUN2") -#pragma push_macro("__DEF_FLOAT_FUN2I") -#pragma push_macro("__HIP_OVERLOAD") -#pragma push_macro("__HIP_OVERLOAD2") - -// __hip_enable_if::type is a type function which returns __T if __B is true. -template -struct __hip_enable_if {}; - -template struct __hip_enable_if { - typedef __T type; -}; - -// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to -// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with -// floor(double). -#define __HIP_OVERLOAD1(__retty, __fn) \ - template \ - __DEVICE__ \ - typename __hip_enable_if::is_integer, \ - __retty>::type \ - __fn(__T __x) { \ - return ::__fn((double)__x); \ - } - -// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double -// or integer argument to avoid compilation error due to ambibuity. e.g. -// max(5.0f, 6.0) is resolved with max(double, double). -#define __HIP_OVERLOAD2(__retty, __fn) \ - template \ - __DEVICE__ typename __hip_enable_if< \ - std::numeric_limits<__T1>::is_specialized && \ - std::numeric_limits<__T2>::is_specialized, \ - __retty>::type \ - __fn(__T1 __x, __T2 __y) { \ - return __fn((double)__x, (double)__y); \ - } - -// Define cmath functions with float argument and returns float. -#define __DEF_FUN1(retty, func) \ -__DEVICE__ \ -inline \ -float func(float x) \ -{ \ - return func##f(x); \ -} \ -__HIP_OVERLOAD1(retty, func) - -// Define cmath functions with float argument and returns retty. -#define __DEF_FUNI(retty, func) \ -__DEVICE__ \ -inline \ -retty func(float x) \ -{ \ - return func##f(x); \ -} \ -__HIP_OVERLOAD1(retty, func) - -// define cmath functions with two float arguments. -#define __DEF_FUN2(retty, func) \ -__DEVICE__ \ -inline \ -float func(float x, float y) \ -{ \ - return func##f(x, y); \ -} \ -__HIP_OVERLOAD2(retty, func) - -__DEF_FUN1(double, acos) -__DEF_FUN1(double, acosh) -__DEF_FUN1(double, asin) -__DEF_FUN1(double, asinh) -__DEF_FUN1(double, atan) -__DEF_FUN2(double, atan2); -__DEF_FUN1(double, atanh) -__DEF_FUN1(double, cbrt) -__DEF_FUN1(double, ceil) -__DEF_FUN2(double, copysign); -__DEF_FUN1(double, cos) -__DEF_FUN1(double, cosh) -__DEF_FUN1(double, erf) -__DEF_FUN1(double, erfc) -__DEF_FUN1(double, exp) -__DEF_FUN1(double, exp2) -__DEF_FUN1(double, expm1) -__DEF_FUN1(double, fabs) -__DEF_FUN2(double, fdim); -__DEF_FUN1(double, floor) -__DEF_FUN2(double, fmax); -__DEF_FUN2(double, fmin); -__DEF_FUN2(double, fmod); -//__HIP_OVERLOAD1(int, fpclassify) -__DEF_FUN2(double, hypot); -__DEF_FUNI(int, ilogb) -__HIP_OVERLOAD1(bool, isfinite) -__HIP_OVERLOAD2(bool, isgreater); -__HIP_OVERLOAD2(bool, isgreaterequal); -__HIP_OVERLOAD1(bool, isinf); -__HIP_OVERLOAD2(bool, isless); -__HIP_OVERLOAD2(bool, islessequal); -__HIP_OVERLOAD2(bool, islessgreater); -__HIP_OVERLOAD1(bool, isnan); -//__HIP_OVERLOAD1(bool, isnormal) -__HIP_OVERLOAD2(bool, isunordered); -__DEF_FUN1(double, lgamma) -__DEF_FUN1(double, log) -__DEF_FUN1(double, log10) -__DEF_FUN1(double, log1p) -__DEF_FUN1(double, log2) -__DEF_FUN1(double, logb) -__DEF_FUNI(long long, llrint) -__DEF_FUNI(long long, llround) -__DEF_FUNI(long, lrint) -__DEF_FUNI(long, lround) -__DEF_FUN1(double, nearbyint); -__DEF_FUN2(double, nextafter); -__DEF_FUN2(double, pow); -__DEF_FUN2(double, remainder); -__DEF_FUN1(double, rint); -__DEF_FUN1(double, round); -__HIP_OVERLOAD1(bool, signbit) -__DEF_FUN1(double, sin) -__DEF_FUN1(double, sinh) -__DEF_FUN1(double, sqrt) -__DEF_FUN1(double, tan) -__DEF_FUN1(double, tanh) -__DEF_FUN1(double, tgamma) -__DEF_FUN1(double, trunc); - -// define cmath functions with a float and an integer argument. -#define __DEF_FLOAT_FUN2I(func) \ -__DEVICE__ \ -inline \ -float func(float x, int y) \ -{ \ - return func##f(x, y); \ -} -__DEF_FLOAT_FUN2I(scalbn) - -#if __HCC__ -template -__DEVICE__ inline static T min(T arg1, T arg2) { - return (arg1 < arg2) ? arg1 : arg2; -} - -__DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) { - return min(arg1, (uint32_t) arg2); -} -/*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) { - return min((uint32_t) arg1, arg2); -} - -__DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) { - return min(arg1, (uint64_t) arg2); -} -__DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) { - return min((uint64_t) arg1, arg2); -} - -__DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) { - return min(arg1, (unsigned long long) arg2); -} -__DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) { - return min((unsigned long long) arg1, arg2); -}*/ - -template -__DEVICE__ inline static T max(T arg1, T arg2) { - return (arg1 > arg2) ? arg1 : arg2; -} - -__DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) { - return max(arg1, (uint32_t) arg2); -} -__DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) { - return max((uint32_t) arg1, arg2); -} - -/*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) { - return max(arg1, (uint64_t) arg2); -} -__DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) { - return max((uint64_t) arg1, arg2); -} - -__DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) { - return max(arg1, (unsigned long long) arg2); -} -__DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) { - return max((unsigned long long) arg1, arg2); -}*/ -#else -__DEVICE__ inline int min(int arg1, int arg2) { - return (arg1 < arg2) ? arg1 : arg2; -} -__DEVICE__ inline int max(int arg1, int arg2) { - return (arg1 > arg2) ? arg1 : arg2; -} - -__DEVICE__ -inline -float max(float x, float y) { - return fmaxf(x, y); -} - -__DEVICE__ -inline -double max(double x, double y) { - return fmax(x, y); -} - -__DEVICE__ -inline -float min(float x, float y) { - return fminf(x, y); -} - -__DEVICE__ -inline -double min(double x, double y) { - return fmin(x, y); -} - -__HIP_OVERLOAD2(double, max) -__HIP_OVERLOAD2(double, min) - -#endif - -__host__ inline static int min(int arg1, int arg2) { - return std::min(arg1, arg2); -} - -__host__ inline static int max(int arg1, int arg2) { - return std::max(arg1, arg2); -} - - -#pragma pop_macro("__DEF_FLOAT_FUN") -#pragma pop_macro("__DEF_FLOAT_FUN2") -#pragma pop_macro("__DEF_FLOAT_FUN2I") -#pragma pop_macro("__HIP_OVERLOAD") -#pragma pop_macro("__HIP_OVERLOAD2") -#pragma pop_macro("__DEVICE__") -#pragma pop_macro("__RETURN_TYPE") - -// For backward compatibility. -// There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros -// defined after including math_functions.h. -#include diff --git a/src/utils/amd_hip/hip/hcc_detail/math_fwd.h b/src/utils/amd_hip/hip/hcc_detail/math_fwd.h deleted file mode 100644 index c25b5e90b..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/math_fwd.h +++ /dev/null @@ -1,706 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "host_defines.h" - -#if defined(__cplusplus) - extern "C" { -#endif - -// DOT FUNCTIONS -#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ -__device__ -__attribute__((const)) -int __ockl_sdot2( - HIP_vector_base::Native_vec_, - HIP_vector_base::Native_vec_, - int, bool); - -__device__ -__attribute__((const)) -unsigned int __ockl_udot2( - HIP_vector_base::Native_vec_, - HIP_vector_base::Native_vec_, - unsigned int, bool); - -__device__ -__attribute__((const)) -int __ockl_sdot4( - HIP_vector_base::Native_vec_, - HIP_vector_base::Native_vec_, - int, bool); - -__device__ -__attribute__((const)) -unsigned int __ockl_udot4( - HIP_vector_base::Native_vec_, - HIP_vector_base::Native_vec_, - unsigned int, bool); - -__device__ -__attribute__((const)) -int __ockl_sdot8(int, int, int, bool); - -__device__ -__attribute__((const)) -unsigned int __ockl_udot8(unsigned int, unsigned int, unsigned int, bool); -#endif - -// BEGIN FLOAT -__device__ -__attribute__((const)) -float __ocml_acos_f32(float); -__device__ -__attribute__((pure)) -float __ocml_acosh_f32(float); -__device__ -__attribute__((const)) -float __ocml_asin_f32(float); -__device__ -__attribute__((pure)) -float __ocml_asinh_f32(float); -__device__ -__attribute__((const)) -float __ocml_atan2_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_atan_f32(float); -__device__ -__attribute__((pure)) -float __ocml_atanh_f32(float); -__device__ -__attribute__((pure)) -float __ocml_cbrt_f32(float); -__device__ -__attribute__((const)) -float __ocml_ceil_f32(float); -__device__ -__attribute__((const)) -__device__ -float __ocml_copysign_f32(float, float); -__device__ -float __ocml_cos_f32(float); -__device__ -float __ocml_native_cos_f32(float); -__device__ -__attribute__((pure)) -__device__ -float __ocml_cosh_f32(float); -__device__ -float __ocml_cospi_f32(float); -__device__ -float __ocml_i0_f32(float); -__device__ -float __ocml_i1_f32(float); -__device__ -__attribute__((pure)) -float __ocml_erfc_f32(float); -__device__ -__attribute__((pure)) -float __ocml_erfcinv_f32(float); -__device__ -__attribute__((pure)) -float __ocml_erfcx_f32(float); -__device__ -__attribute__((pure)) -float __ocml_erf_f32(float); -__device__ -__attribute__((pure)) -float __ocml_erfinv_f32(float); -__device__ -__attribute__((pure)) -float __ocml_exp10_f32(float); -__device__ -__attribute__((pure)) -float __ocml_native_exp10_f32(float); -__device__ -__attribute__((pure)) -float __ocml_exp2_f32(float); -__device__ -__attribute__((pure)) -float __ocml_exp_f32(float); -__device__ -__attribute__((pure)) -float __ocml_native_exp_f32(float); -__device__ -__attribute__((pure)) -float __ocml_expm1_f32(float); -__device__ -__attribute__((const)) -float __ocml_fabs_f32(float); -__device__ -__attribute__((const)) -float __ocml_fdim_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_floor_f32(float); -__device__ -__attribute__((const)) -float __ocml_fma_f32(float, float, float); -__device__ -__attribute__((const)) -float __ocml_fmax_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_fmin_f32(float, float); -__device__ -__attribute__((const)) -__device__ -float __ocml_fmod_f32(float, float); -__device__ -float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*); -__device__ -__attribute__((const)) -float __ocml_hypot_f32(float, float); -__device__ -__attribute__((const)) -int __ocml_ilogb_f32(float); -__device__ -__attribute__((const)) -int __ocml_isfinite_f32(float); -__device__ -__attribute__((const)) -int __ocml_isinf_f32(float); -__device__ -__attribute__((const)) -int __ocml_isnan_f32(float); -__device__ -float __ocml_j0_f32(float); -__device__ -float __ocml_j1_f32(float); -__device__ -__attribute__((const)) -float __ocml_ldexp_f32(float, int); -__device__ -float __ocml_lgamma_f32(float); -__device__ -__attribute__((pure)) -float __ocml_log10_f32(float); -__device__ -__attribute__((pure)) -float __ocml_native_log10_f32(float); -__device__ -__attribute__((pure)) -float __ocml_log1p_f32(float); -__device__ -__attribute__((pure)) -float __ocml_log2_f32(float); -__device__ -__attribute__((pure)) -float __ocml_native_log2_f32(float); -__device__ -__attribute__((const)) -float __ocml_logb_f32(float); -__device__ -__attribute__((pure)) -float __ocml_log_f32(float); -__device__ -__attribute__((pure)) -float __ocml_native_log_f32(float); -__device__ -float __ocml_modf_f32(float, __attribute__((address_space(5))) float*); -__device__ -__attribute__((const)) -float __ocml_nearbyint_f32(float); -__device__ -__attribute__((const)) -float __ocml_nextafter_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_len3_f32(float, float, float); -__device__ -__attribute__((const)) -float __ocml_len4_f32(float, float, float, float); -__device__ -__attribute__((pure)) -float __ocml_ncdf_f32(float); -__device__ -__attribute__((pure)) -float __ocml_ncdfinv_f32(float); -__device__ -__attribute__((pure)) -float __ocml_pow_f32(float, float); -__device__ -__attribute__((pure)) -float __ocml_rcbrt_f32(float); -__device__ -__attribute__((const)) -float __ocml_remainder_f32(float, float); -__device__ -float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*); -__device__ -__attribute__((const)) -float __ocml_rhypot_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_rint_f32(float); -__device__ -__attribute__((const)) -float __ocml_rlen3_f32(float, float, float); -__device__ -__attribute__((const)) -float __ocml_rlen4_f32(float, float, float, float); -__device__ -__attribute__((const)) -float __ocml_round_f32(float); -__device__ -__attribute__((pure)) -float __ocml_rsqrt_f32(float); -__device__ -__attribute__((const)) -float __ocml_scalb_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_scalbn_f32(float, int); -__device__ -__attribute__((const)) -int __ocml_signbit_f32(float); -__device__ -float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*); -__device__ -float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*); -__device__ -float __ocml_sin_f32(float); -__device__ -float __ocml_native_sin_f32(float); -__device__ -__attribute__((pure)) -float __ocml_sinh_f32(float); -__device__ -float __ocml_sinpi_f32(float); -__device__ -__attribute__((const)) -float __ocml_sqrt_f32(float); -__device__ -__attribute__((const)) -float __ocml_native_sqrt_f32(float); -__device__ -float __ocml_tan_f32(float); -__device__ -__attribute__((pure)) -float __ocml_tanh_f32(float); -__device__ -float __ocml_tgamma_f32(float); -__device__ -__attribute__((const)) -float __ocml_trunc_f32(float); -__device__ -float __ocml_y0_f32(float); -__device__ -float __ocml_y1_f32(float); - -// BEGIN INTRINSICS -__device__ -__attribute__((const)) -float __ocml_add_rte_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_add_rtn_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_add_rtp_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_add_rtz_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_sub_rte_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_sub_rtn_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_sub_rtp_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_sub_rtz_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_mul_rte_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_mul_rtn_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_mul_rtp_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_mul_rtz_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_div_rte_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_div_rtn_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_div_rtp_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_div_rtz_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_sqrt_rte_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_sqrt_rtn_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_sqrt_rtp_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_sqrt_rtz_f32(float, float); -__device__ -__attribute__((const)) -float __ocml_fma_rte_f32(float, float, float); -__device__ -__attribute__((const)) -float __ocml_fma_rtn_f32(float, float, float); -__device__ -__attribute__((const)) -float __ocml_fma_rtp_f32(float, float, float); -__device__ -__attribute__((const)) -float __ocml_fma_rtz_f32(float, float, float); - -__device__ -__attribute__((const)) -float __llvm_amdgcn_cos_f32(float) __asm("llvm.amdgcn.cos.f32"); -__device__ -__attribute__((const)) -float __llvm_amdgcn_rcp_f32(float) __asm("llvm.amdgcn.rcp.f32"); -__device__ -__attribute__((const)) -float __llvm_amdgcn_rsq_f32(float) __asm("llvm.amdgcn.rsq.f32"); -__device__ -__attribute__((const)) -float __llvm_amdgcn_sin_f32(float) __asm("llvm.amdgcn.sin.f32"); -// END INTRINSICS -// END FLOAT - -// BEGIN DOUBLE -__device__ -__attribute__((const)) -double __ocml_acos_f64(double); -__device__ -__attribute__((pure)) -double __ocml_acosh_f64(double); -__device__ -__attribute__((const)) -double __ocml_asin_f64(double); -__device__ -__attribute__((pure)) -double __ocml_asinh_f64(double); -__device__ -__attribute__((const)) -double __ocml_atan2_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_atan_f64(double); -__device__ -__attribute__((pure)) -double __ocml_atanh_f64(double); -__device__ -__attribute__((pure)) -double __ocml_cbrt_f64(double); -__device__ -__attribute__((const)) -double __ocml_ceil_f64(double); -__device__ -__attribute__((const)) -double __ocml_copysign_f64(double, double); -__device__ -double __ocml_cos_f64(double); -__device__ -__attribute__((pure)) -double __ocml_cosh_f64(double); -__device__ -double __ocml_cospi_f64(double); -__device__ -double __ocml_i0_f64(double); -__device__ -double __ocml_i1_f64(double); -__device__ -__attribute__((pure)) -double __ocml_erfc_f64(double); -__device__ -__attribute__((pure)) -double __ocml_erfcinv_f64(double); -__device__ -__attribute__((pure)) -double __ocml_erfcx_f64(double); -__device__ -__attribute__((pure)) -double __ocml_erf_f64(double); -__device__ -__attribute__((pure)) -double __ocml_erfinv_f64(double); -__device__ -__attribute__((pure)) -double __ocml_exp10_f64(double); -__device__ -__attribute__((pure)) -double __ocml_exp2_f64(double); -__device__ -__attribute__((pure)) -double __ocml_exp_f64(double); -__device__ -__attribute__((pure)) -double __ocml_expm1_f64(double); -__device__ -__attribute__((const)) -double __ocml_fabs_f64(double); -__device__ -__attribute__((const)) -double __ocml_fdim_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_floor_f64(double); -__device__ -__attribute__((const)) -double __ocml_fma_f64(double, double, double); -__device__ -__attribute__((const)) -double __ocml_fmax_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_fmin_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_fmod_f64(double, double); -__device__ -double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*); -__device__ -__attribute__((const)) -double __ocml_hypot_f64(double, double); -__device__ -__attribute__((const)) -int __ocml_ilogb_f64(double); -__device__ -__attribute__((const)) -int __ocml_isfinite_f64(double); -__device__ -__attribute__((const)) -int __ocml_isinf_f64(double); -__device__ -__attribute__((const)) -int __ocml_isnan_f64(double); -__device__ -double __ocml_j0_f64(double); -__device__ -double __ocml_j1_f64(double); -__device__ -__attribute__((const)) -double __ocml_ldexp_f64(double, int); -__device__ -double __ocml_lgamma_f64(double); -__device__ -__attribute__((pure)) -double __ocml_log10_f64(double); -__device__ -__attribute__((pure)) -double __ocml_log1p_f64(double); -__device__ -__attribute__((pure)) -double __ocml_log2_f64(double); -__device__ -__attribute__((const)) -double __ocml_logb_f64(double); -__device__ -__attribute__((pure)) -double __ocml_log_f64(double); -__device__ -double __ocml_modf_f64(double, __attribute__((address_space(5))) double*); -__device__ -__attribute__((const)) -double __ocml_nearbyint_f64(double); -__device__ -__attribute__((const)) -double __ocml_nextafter_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_len3_f64(double, double, double); -__device__ -__attribute__((const)) -double __ocml_len4_f64(double, double, double, double); -__device__ -__attribute__((pure)) -double __ocml_ncdf_f64(double); -__device__ -__attribute__((pure)) -double __ocml_ncdfinv_f64(double); -__device__ -__attribute__((pure)) -double __ocml_pow_f64(double, double); -__device__ -__attribute__((pure)) -double __ocml_rcbrt_f64(double); -__device__ -__attribute__((const)) -double __ocml_remainder_f64(double, double); -__device__ -double __ocml_remquo_f64( - double, double, __attribute__((address_space(5))) int*); -__device__ -__attribute__((const)) -double __ocml_rhypot_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_rint_f64(double); -__device__ -__attribute__((const)) -double __ocml_rlen3_f64(double, double, double); -__device__ -__attribute__((const)) -double __ocml_rlen4_f64(double, double, double, double); -__device__ -__attribute__((const)) -double __ocml_round_f64(double); -__device__ -__attribute__((pure)) -double __ocml_rsqrt_f64(double); -__device__ -__attribute__((const)) -double __ocml_scalb_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_scalbn_f64(double, int); -__device__ -__attribute__((const)) -int __ocml_signbit_f64(double); -__device__ -double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*); -__device__ -double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*); -__device__ -double __ocml_sin_f64(double); -__device__ -__attribute__((pure)) -double __ocml_sinh_f64(double); -__device__ -double __ocml_sinpi_f64(double); -__device__ -__attribute__((const)) -double __ocml_sqrt_f64(double); -__device__ -double __ocml_tan_f64(double); -__device__ -__attribute__((pure)) -double __ocml_tanh_f64(double); -__device__ -double __ocml_tgamma_f64(double); -__device__ -__attribute__((const)) -double __ocml_trunc_f64(double); -__device__ -double __ocml_y0_f64(double); -__device__ -double __ocml_y1_f64(double); - -// BEGIN INTRINSICS -__device__ -__attribute__((const)) -double __ocml_add_rte_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_add_rtn_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_add_rtp_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_add_rtz_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_sub_rte_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_sub_rtn_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_sub_rtp_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_sub_rtz_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_mul_rte_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_mul_rtn_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_mul_rtp_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_mul_rtz_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_div_rte_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_div_rtn_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_div_rtp_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_div_rtz_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_sqrt_rte_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_sqrt_rtn_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_sqrt_rtp_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_sqrt_rtz_f64(double, double); -__device__ -__attribute__((const)) -double __ocml_fma_rte_f64(double, double, double); -__device__ -__attribute__((const)) -double __ocml_fma_rtn_f64(double, double, double); -__device__ -__attribute__((const)) -double __ocml_fma_rtp_f64(double, double, double); -__device__ -__attribute__((const)) -double __ocml_fma_rtz_f64(double, double, double); - -__device__ -__attribute__((const)) -double __llvm_amdgcn_rcp_f64(double) __asm("llvm.amdgcn.rcp.f64"); -__device__ -__attribute__((const)) -double __llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64"); -// END INTRINSICS -// END DOUBLE - -#if defined(__cplusplus) - } // extern "C" -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/program_state.hpp b/src/utils/amd_hip/hip/hcc_detail/program_state.hpp deleted file mode 100644 index da13c7c3d..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/program_state.hpp +++ /dev/null @@ -1,108 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -struct ihipModuleSymbol_t; -using hipFunction_t = ihipModuleSymbol_t*; - -namespace std { -template <> -struct hash { - size_t operator()(hsa_agent_t x) const { return hash{}(x.handle); } -}; -} // namespace std - -inline constexpr bool operator==(hsa_agent_t x, hsa_agent_t y) { return x.handle == y.handle; } - -namespace hip_impl { -class Kernel_descriptor { - std::uint64_t kernel_object_{}; - amd_kernel_code_t const* kernel_header_{nullptr}; - std::string name_{}; -public: - Kernel_descriptor() = default; - Kernel_descriptor(std::uint64_t kernel_object, const std::string& name) - : kernel_object_{kernel_object}, name_{name} - { - bool supported{false}; - std::uint16_t min_v{UINT16_MAX}; - auto r = hsa_system_major_extension_supported( - HSA_EXTENSION_AMD_LOADER, 1, &min_v, &supported); - - if (r != HSA_STATUS_SUCCESS || !supported) return; - - hsa_ven_amd_loader_1_01_pfn_t tbl{}; - - r = hsa_system_get_major_extension_table( - HSA_EXTENSION_AMD_LOADER, - 1, - sizeof(tbl), - reinterpret_cast(&tbl)); - - if (r != HSA_STATUS_SUCCESS) return; - if (!tbl.hsa_ven_amd_loader_query_host_address) return; - - r = tbl.hsa_ven_amd_loader_query_host_address( - reinterpret_cast(kernel_object_), - reinterpret_cast(&kernel_header_)); - - if (r != HSA_STATUS_SUCCESS) return; - } - Kernel_descriptor(const Kernel_descriptor&) = default; - Kernel_descriptor(Kernel_descriptor&&) = default; - ~Kernel_descriptor() = default; - - Kernel_descriptor& operator=(const Kernel_descriptor&) = default; - Kernel_descriptor& operator=(Kernel_descriptor&&) = default; - - operator hipFunction_t() const { // TODO: this is awful and only meant for illustration. - return reinterpret_cast(const_cast(this)); - } -}; - -const std::unordered_map>& executables( - bool rebuild = false); -const std::unordered_map>>& -functions(bool rebuild = false); -const std::unordered_map& function_names(bool rebuild = false); -std::unordered_map& globals(bool rebuild = false); -const std::unordered_map< - std::string, std::vector>>& - kernargs(bool rebuild = false); - -hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, - hsa_agent_t agent); -} // Namespace hip_impl. diff --git a/src/utils/amd_hip/hip/hcc_detail/surface_functions.h b/src/utils/amd_hip/hip/hcc_detail/surface_functions.h deleted file mode 100644 index b9cab1f46..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/surface_functions.h +++ /dev/null @@ -1,59 +0,0 @@ -/* -Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H - -#include - -#define __SURFACE_FUNCTIONS_DECL__ static inline __device__ -template -__SURFACE_FUNCTIONS_DECL__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, - int boundaryMode = hipBoundaryModeZero) { - hipArray* arrayPtr = (hipArray*)surfObj; - size_t width = arrayPtr->width; - size_t height = arrayPtr->height; - int32_t xOffset = x / sizeof(T); - T* dataPtr = (T*)arrayPtr->data; - if ((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0)) { - if (boundaryMode == hipBoundaryModeZero) { - *data = 0; - } - } else { - *data = *(dataPtr + y * width + xOffset); - } -} - -template -__SURFACE_FUNCTIONS_DECL__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, - int boundaryMode = hipBoundaryModeZero) { - hipArray* arrayPtr = (hipArray*)surfObj; - size_t width = arrayPtr->width; - size_t height = arrayPtr->height; - int32_t xOffset = x / sizeof(T); - T* dataPtr = (T*)arrayPtr->data; - if (!((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0))) { - *(dataPtr + y * width + xOffset) = data; - } -} - -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/texture_functions.h b/src/utils/amd_hip/hip/hcc_detail/texture_functions.h deleted file mode 100644 index 1f22e0117..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/texture_functions.h +++ /dev/null @@ -1,11102 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H - -#include -#include - -#pragma push_macro("TYPEDEF_VECTOR_VALUE_TYPE") -#define TYPEDEF_VECTOR_VALUE_TYPE(SCALAR_TYPE) \ -typedef SCALAR_TYPE __hip_##SCALAR_TYPE##2_vector_value_type __attribute__((ext_vector_type(2))); \ -typedef SCALAR_TYPE __hip_##SCALAR_TYPE##3_vector_value_type __attribute__((ext_vector_type(3))); \ -typedef SCALAR_TYPE __hip_##SCALAR_TYPE##4_vector_value_type __attribute__((ext_vector_type(4))); \ -typedef SCALAR_TYPE __hip_##SCALAR_TYPE##8_vector_value_type __attribute__((ext_vector_type(8))); \ -typedef SCALAR_TYPE __hip_##SCALAR_TYPE##16_vector_value_type __attribute__((ext_vector_type(16))); - -TYPEDEF_VECTOR_VALUE_TYPE(float); -TYPEDEF_VECTOR_VALUE_TYPE(int); -TYPEDEF_VECTOR_VALUE_TYPE(uint); - -#undef TYPEDEF_VECTOR_VALUE_TYPE -#pragma pop_macro("TYPEDEF_VECTOR_VALUE_TYPE") - -union TData { - __hip_float4_vector_value_type f; - __hip_int4_vector_value_type i; - __hip_uint4_vector_value_type u; -}; - -#define __TEXTURE_FUNCTIONS_DECL__ static inline __device__ - - -#if (__hcc_workweek__ >= 18114) || __clang__ -#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4))) -#else -#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(2))) -#endif - -#define TEXTURE_PARAMETERS_INIT \ - unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \ - unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; \ - TData texel; -#define TEXTURE_REF_PARAMETERS_INIT \ - unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)texRef.textureObject; \ - unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; \ - TData texel; -#define TEXTURE_SET_FLOAT *retVal = texel.f.x; - -#define TEXTURE_SET_SIGNED *retVal = texel.i.x; - -#define TEXTURE_SET_UNSIGNED *retVal = texel.u.x; - -#define TEXTURE_SET_FLOAT_X retVal->x = texel.f.x; - -#define TEXTURE_SET_SIGNED_X retVal->x = texel.i.x; - -#define TEXTURE_SET_UNSIGNED_X retVal->x = texel.u.x; - -#define TEXTURE_SET_FLOAT_XY \ - retVal->x = texel.f.x; \ - retVal->y = texel.f.y; - -#define TEXTURE_SET_SIGNED_XY \ - retVal->x = texel.i.x; \ - retVal->y = texel.i.y; - -#define TEXTURE_SET_UNSIGNED_XY \ - retVal->x = texel.u.x; \ - retVal->y = texel.u.y; - -#define TEXTURE_SET_FLOAT_XYZW \ - retVal->x = texel.f.x; \ - retVal->y = texel.f.y; \ - retVal->z = texel.f.z; \ - retVal->w = texel.f.w; - -#define TEXTURE_SET_SIGNED_XYZW \ - retVal->x = texel.i.x; \ - retVal->y = texel.i.y; \ - retVal->z = texel.i.z; \ - retVal->w = texel.i.w; - -#define TEXTURE_SET_UNSIGNED_XYZW \ - retVal->x = texel.u.x; \ - retVal->y = texel.u.y; \ - retVal->z = texel.u.z; \ - retVal->w = texel.u.w; - -#define TEXTURE_RETURN_CHAR return texel.i.x; - -#define TEXTURE_RETURN_UCHAR return texel.u.x; - -#define TEXTURE_RETURN_SHORT return texel.i.x; - -#define TEXTURE_RETURN_USHORT return texel.u.x; - -#define TEXTURE_RETURN_INT return texel.i.x; - -#define TEXTURE_RETURN_UINT return texel.u.x; - -#define TEXTURE_RETURN_FLOAT return texel.f.x; - -#define TEXTURE_RETURN_SIGNED return texel.i.x; - -#define TEXTURE_RETURN_UNSIGNED return texel.u.x; - -#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x); - -#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x); - -#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x); - -#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x); - -#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x); - -#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x); - -#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x); - -#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y); - -#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y); - -#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y); - -#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y); - -#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y); - -#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y); - -#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y); - -#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); - -#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); - -#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); - -#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); - -#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); - -#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); - -#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w); - -extern "C" { - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_1D( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - float c); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_1Da( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float2_vector_value_type c); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_2D( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float2_vector_value_type c); - - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_2Da( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float4_vector_value_type c); - -__device__ -float __ockl_image_sample_2Dad( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float4_vector_value_type c); - -__device__ -float __ockl_image_sample_2Dd( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float2_vector_value_type c); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_3D( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float4_vector_value_type c); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_grad_1D( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - float c, float dx, float dy); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_grad_1Da( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float2_vector_value_type c, float dx, float dy); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_grad_2D( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float2_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_grad_2Da( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float4_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy); - -__device__ -float __ockl_image_sample_grad_2Dad( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float4_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy); - -__device__ -float __ockl_image_sample_grad_2Dd( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float2_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_grad_3D( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float4_vector_value_type c, __hip_float4_vector_value_type dx, __hip_float4_vector_value_type dy); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_lod_1D( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - float c, float l); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_lod_1Da( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float2_vector_value_type c, float l); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_lod_2D( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float2_vector_value_type c, float l); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_lod_2Da( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float4_vector_value_type c, float l); - -__device__ -float __ockl_image_sample_lod_2Dad( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float4_vector_value_type c, float l); - -__device__ -float __ockl_image_sample_lod_2Dd( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float2_vector_value_type c, float l); - -__device__ -__hip_float4_vector_value_type __ockl_image_sample_lod_3D( - unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s, - __hip_float4_vector_value_type c, float l); -} - -//////////////////////////////////////////////////////////// -// Texture object APIs -//////////////////////////////////////////////////////////// - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char1* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char2* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char4* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned char* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar1* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar2* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar4* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short1* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short2* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short4* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned short* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort1* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort2* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort4* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int1* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int2* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int4* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned int* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint1* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint2* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint4* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float* retVal, hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float1* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float2* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float4* retVal, hipTextureObject_t textureObject, - int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex1Dfetch(hipTextureObject_t textureObject, int x) { - T ret; - tex1Dfetch(&ret, textureObject, x); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex1D(char* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(char1* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(char2* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(char4* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned char* retVal, hipTextureObject_t textureObject, - float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar1* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar2* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar4* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(short* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(short1* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(short2* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(short4* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned short* retVal, hipTextureObject_t textureObject, - float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort1* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort2* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort4* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(int* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(int1* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(int2* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(int4* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned int* retVal, hipTextureObject_t textureObject, - float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint1* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint2* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint4* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(float* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(float1* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(float2* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1D(float4* retVal, hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_SET_FLOAT_XYZW; -} -template -__TEXTURE_FUNCTIONS_DECL__ T tex1D(hipTextureObject_t textureObject, float x) { - T ret; - tex1D(&ret, textureObject, x); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char1* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char2* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char4* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned char* retVal, hipTextureObject_t textureObject, - float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar1* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar2* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar4* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short1* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short2* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short4* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned short* retVal, hipTextureObject_t textureObject, - float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort1* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort2* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort4* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int1* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int2* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int4* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned int* retVal, hipTextureObject_t textureObject, - float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint1* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint2* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint4* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float1* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float2* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float4* retVal, hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex1DLod(hipTextureObject_t textureObject, float x, float level) { - T ret; - tex1DLod(&ret, textureObject, x, level); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char1* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char2* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char4* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned char* retVal, hipTextureObject_t textureObject, - float x, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar1* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar2* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar4* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short1* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short2* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short4* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned short* retVal, hipTextureObject_t textureObject, - float x, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort1* retVal, hipTextureObject_t textureObject, - float x, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort2* retVal, hipTextureObject_t textureObject, - float x, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort4* retVal, hipTextureObject_t textureObject, - float x, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int1* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int2* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int4* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned int* retVal, hipTextureObject_t textureObject, - float x, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint1* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint2* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint4* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float1* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float2* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float4* retVal, hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dx, - float dy) { - T ret; - tex1DLod(&ret, textureObject, x, dx, dy); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex2D(char* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(char1* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(char2* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(char4* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned char* retVal, hipTextureObject_t textureObject, - float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar1* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar2* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar4* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(short* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(short1* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(short2* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(short4* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned short* retVal, hipTextureObject_t textureObject, - float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort1* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort2* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort4* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(int* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(int1* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(int2* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(int4* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned int* retVal, hipTextureObject_t textureObject, - float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint1* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint2* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint4* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(float* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(float1* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(float2* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2D(float4* retVal, hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex2D(hipTextureObject_t textureObject, float x, float y) { - T ret; - tex2D(&ret, textureObject, x, y); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char1* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char2* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char4* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned char* retVal, hipTextureObject_t textureObject, - float x, float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar1* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar2* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar4* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short1* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short2* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short4* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned short* retVal, hipTextureObject_t textureObject, - float x, float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort1* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort2* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort4* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int1* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int2* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int4* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned int* retVal, hipTextureObject_t textureObject, - float x, float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint1* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint2* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint4* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float1* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float2* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float4* retVal, hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, - float level) { - T ret; - tex2DLod(&ret, textureObject, x, y, level); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex3D(char* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(char1* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(char2* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(char4* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned char* retVal, hipTextureObject_t textureObject, - float x, float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar1* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar2* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar4* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(short* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(short1* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(short2* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(short4* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned short* retVal, hipTextureObject_t textureObject, - float x, float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort1* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort2* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort4* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(int* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(int1* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(int2* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(int4* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned int* retVal, hipTextureObject_t textureObject, - float x, float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint1* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint2* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint4* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(float* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(float1* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(float2* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3D(float4* retVal, hipTextureObject_t textureObject, float x, - float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) { - T ret; - tex3D(&ret, textureObject, x, y, z); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char1* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char2* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char4* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned char* retVal, hipTextureObject_t textureObject, - float x, float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar1* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar2* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar4* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short1* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short2* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short4* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned short* retVal, hipTextureObject_t textureObject, - float x, float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort1* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort2* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort4* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int1* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int2* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int4* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned int* retVal, hipTextureObject_t textureObject, - float x, float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint1* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint2* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint4* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float1* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float2* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float4* retVal, hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, - float level) { - T ret; - tex3DLod(&ret, textureObject, x, y, z, level); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char1* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char2* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED_XY; -} -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char4* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned char* retVal, - hipTextureObject_t textureObject, float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar1* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar2* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED_XY; -} -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar4* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short1* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short2* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED_XY; -} -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short4* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned short* retVal, - hipTextureObject_t textureObject, float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort1* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort2* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED_XY; -} -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort4* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int* retVal, hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int1* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int2* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED_XY; -} -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int4* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned int* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint1* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint2* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED_XY; -} -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint4* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float1* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float2* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_FLOAT_XY; -} -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float4* retVal, hipTextureObject_t textureObject, - float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) { - T ret; - tex1DLayered(&ret, textureObject, x, layer); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char1* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char2* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char4* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned char* retVal, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar1* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar2* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar4* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short1* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short2* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short4* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned short* retVal, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort1* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort2* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort4* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int1* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int2* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int4* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned int* retVal, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint1* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint2* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint4* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float1* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float2* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float4* retVal, hipTextureObject_t textureObject, - float x, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, - float level) { - T ret; - tex1DLayeredLod(&ret, textureObject, x, layer, level); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char1* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char2* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char4* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned char* retVal, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar1* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar2* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar4* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short1* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short2* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short4* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned short* retVal, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort1* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort2* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort4* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int1* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int2* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int4* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned int* retVal, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint1* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint2* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint4* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float1* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float2* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float4* retVal, hipTextureObject_t textureObject, - float x, int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, - float dx, float dy) { - T ret; - tex1DLayeredGrad(&ret, textureObject, x, layer, dx, dy); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned char* retVal, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned short* retVal, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int* retVal, hipTextureObject_t textureObject, float x, - float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned int* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, - int layer) { - T ret; - tex2DLayered(&ret, textureObject, x, y, layer); - return ret; -} - -//////////////////////////////////////////////////////////// -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned char* retVal, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned short* retVal, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_SIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned int* retVal, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_UNSIGNED_XYZW; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_FLOAT; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float1* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_FLOAT_X; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float2* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_FLOAT_XY; -} - -__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float4* retVal, hipTextureObject_t textureObject, - float x, float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_SET_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - T ret; - tex2DLayeredLod(&ret, textureObject, x, y, layer, level); - return ret; -} - -//////////////////////////////////////////////////////////// -// Texture Reference APIs -//////////////////////////////////////////////////////////// -template -__TEXTURE_FUNCTIONS_DECL__ char tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1Dfetch(texture texRef, - int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1Dfetch(texture texRef, - int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1Dfetch(texture texRef, - int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1Dfetch(texture texRef, int x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1Dfetch(texture texRef, - hipTextureObject_t textureObject, int x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// -template -__TEXTURE_FUNCTIONS_DECL__ char tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1D(texture texRef, - float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1D(texture texRef, - float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// -template -__TEXTURE_FUNCTIONS_DECL__ char tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT; -} -////// -template -__TEXTURE_FUNCTIONS_DECL__ float tex1D(texture texRef, float x) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1D(texture texRef, - hipTextureObject_t textureObject, float x) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1D(i, s, x); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_CHAR_X; -} -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLod(texture texRef, - float x, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLod(texture texRef, - float x, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DLod(texture texRef, float x, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLod(texture texRef, - float x, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLod(texture texRef, float x, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_CHAR_X; -} -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLod(texture texRef, - hipTextureObject_t textureObject, float x, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_1D(i, s, x, level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DGrad(texture texRef, - float x, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DGrad(texture texRef, - float x, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DGrad(texture texRef, - float x, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DGrad(texture texRef, float x, float dx, - float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DGrad(texture texRef, float x, - float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float dx, - float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2D(texture texRef, - float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2D(texture texRef, - float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2D(texture texRef, float x, - float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UINT_XYZW; -} - - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2D(texture texRef, - hipTextureObject_t textureObject, float x, - float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2D(texture texRef, float x, float y) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2D(texture texRef, - hipTextureObject_t textureObject, float x, float y) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLod(texture texRef, - float x, float y, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLod(texture texRef, - float x, float y, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLod(texture texRef, float x, - float y, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLod(texture texRef, float x, - float y, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLod(texture texRef, float x, - float y, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLod(texture texRef, - float x, float y, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLod(texture texRef, float x, float y, - float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DGrad(texture texRef, - float x, float y, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DGrad(texture texRef, - float x, float y, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DGrad(texture texRef, float x, - float y, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DGrad(texture texRef, float x, - float y, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DGrad(texture texRef, float x, - float y, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DGrad(texture texRef, - float x, float y, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DGrad(texture texRef, float x, float y, - float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3D(texture texRef, - float x, float y, float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3D(texture texRef, - float x, float y, float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3D(texture texRef, float x, - float y, float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex3D(texture texRef, float x, float y, - float z) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex3D(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DLod(texture texRef, - float x, float y, float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DLod(texture texRef, - float x, float y, float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex3DLod(texture texRef, float x, float y, - float z, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, float z, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex3DLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, - level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DGrad(texture texRef, - float x, float y, float z, float4 dx, - float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3DGrad(texture texRef, - float x, float y, float z, float4 dx, - float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3DGrad(texture texRef, float x, - float y, float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3DGrad(texture texRef, float x, - float y, float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3DGrad(texture texRef, float x, - float y, float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DGrad(texture texRef, - float x, float y, float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex3DGrad(texture texRef, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// -template -__TEXTURE_FUNCTIONS_DECL__ char tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex3DGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - float z, float4 dx, float4 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, - float4(dx.x, dx.y, dx.z, dx.w).data, - float4(dy.x, dy.y, dy.z, dy.w).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayered(texture texRef, - float x, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayered( - texture texRef, float x, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayered(texture texRef, - float x, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayered(texture texRef, float x, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayered( - texture texRef, hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredLod( - texture texRef, float x, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredLod( - texture texRef, float x, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredLod(texture texRef, - float x, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredLod(texture texRef, float x, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredLod( - texture texRef, hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredLod( - texture texRef, hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, int layer, - float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredGrad( - texture texRef, float x, int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredGrad( - texture texRef, hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredGrad( - texture texRef, float x, int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredGrad( - texture texRef, hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredGrad( - texture texRef, float x, int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredGrad( - texture texRef, hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredGrad(texture texRef, float x, - int layer, float dx, float dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - int layer, float dx, float dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DLayered(texture texRef, float x, float y, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayered(texture texRef, - float x, float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayered( - texture texRef, float x, float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayered( - texture texRef, hipTextureObject_t textureObject, float x, - float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DLayered(texture texRef, float x, float y, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayered(texture texRef, float x, float y, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayered(texture texRef, float x, float y, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayered(texture texRef, float x, float y, - int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayered(texture texRef, - float x, float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayered(texture texRef, float x, - float y, int layer) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayered(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredLod( - texture texRef, float x, float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredLod( - texture texRef, hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredLod( - texture texRef, float x, float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredLod( - texture texRef, hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredLod(texture texRef, float x, float y, - int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredLod(texture texRef, - float x, float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredLod(texture texRef, float x, - float y, int layer, float level) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredLod(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float level) { - TEXTURE_PARAMETERS_INIT; - texel.f = __ockl_image_sample_lod_2Da( - i, s, float4(x, y, layer, 0.0f).data, level); - TEXTURE_RETURN_FLOAT_XYZW; -} - -//////////////////////////////////////////////////////////// - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_CHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredGrad( - texture texRef, float x, float y, int layer, float2 dx, - float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredGrad( - texture texRef, hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UCHAR_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_SHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredGrad( - texture texRef, float x, float y, int layer, float2 dx, - float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredGrad( - texture texRef, hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_USHORT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, float y, - int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_INT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredGrad( - texture texRef, float x, float y, int layer, float2 dx, - float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredGrad( - texture texRef, hipTextureObject_t textureObject, float x, float y, - int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_UINT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_X; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_XY; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredGrad(texture texRef, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_REF_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_XYZW; -} - -template -__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredGrad(texture texRef, - hipTextureObject_t textureObject, float x, - float y, int layer, float2 dx, float2 dy) { - TEXTURE_PARAMETERS_INIT; - texel.f = - __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, - float2(dx.x, dx.y).data, - float2(dy.x, dy.y).data); - TEXTURE_RETURN_FLOAT_XYZW; -} -#endif diff --git a/src/utils/amd_hip/hip/hcc_detail/texture_types.h b/src/utils/amd_hip/hip/hcc_detail/texture_types.h deleted file mode 100644 index bc334de24..000000000 --- a/src/utils/amd_hip/hip/hcc_detail/texture_types.h +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_TYPES_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_TYPES_H - -#include - -#define hipTextureType1D 0x01 -#define hipTextureType2D 0x02 -#define hipTextureType3D 0x03 -#define hipTextureTypeCubemap 0x0C -#define hipTextureType1DLayered 0xF1 -#define hipTextureType2DLayered 0xF2 -#define hipTextureTypeCubemapLayered 0xFC - -/** - * Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD - */ -#define HIP_IMAGE_OBJECT_SIZE_DWORD 12 -#define HIP_SAMPLER_OBJECT_SIZE_DWORD 8 -#define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD -#define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD) - -/** - * An opaque value that represents a hip texture object - */ -typedef unsigned long long hipTextureObject_t; - -/** - * hip texture address modes - */ -enum hipTextureAddressMode { - hipAddressModeWrap = 0, - hipAddressModeClamp = 1, - hipAddressModeMirror = 2, - hipAddressModeBorder = 3 -}; - -/** - * hip texture filter modes - */ -enum hipTextureFilterMode { hipFilterModePoint = 0, hipFilterModeLinear = 1 }; - -/** - * hip texture read modes - */ -enum hipTextureReadMode { hipReadModeElementType = 0, hipReadModeNormalizedFloat = 1 }; - -/** - * hip texture reference - */ -typedef struct textureReference { - int normalized; - enum hipTextureFilterMode filterMode; - enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions - struct hipChannelFormatDesc channelDesc; - int sRGB; // Perform sRGB->linear conversion during texture read - unsigned int maxAnisotropy; // Limit to the anisotropy ratio - enum hipTextureFilterMode mipmapFilterMode; - float mipmapLevelBias; - float minMipmapLevelClamp; - float maxMipmapLevelClamp; - - hipTextureObject_t textureObject; - int numChannels; - enum hipArray_Format format; -}textureReference; - -/** - * hip texture descriptor - */ -typedef struct hipTextureDesc { - enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions - enum hipTextureFilterMode filterMode; - enum hipTextureReadMode readMode; - int sRGB; // Perform sRGB->linear conversion during texture read - float borderColor[4]; - int normalizedCoords; - unsigned int maxAnisotropy; - enum hipTextureFilterMode mipmapFilterMode; - float mipmapLevelBias; - float minMipmapLevelClamp; - float maxMipmapLevelClamp; -}hipTextureDesc; - -#endif diff --git a/src/utils/amd_hip/hip/hip_common.h b/src/utils/amd_hip/hip/hip_common.h deleted file mode 100644 index 71285fc8d..000000000 --- a/src/utils/amd_hip/hip/hip_common.h +++ /dev/null @@ -1,79 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HIP_COMMON_H -#define HIP_INCLUDE_HIP_HIP_COMMON_H - -// Common code included at start of every hip file. -// Auto enable __HIP_PLATFORM_HCC__ if compiling with HCC -// Other compiler (GCC,ICC,etc) need to set one of these macros explicitly -#if defined(__HCC__) || (defined(__clang__) && defined(__HIP__)) -#define __HIP_PLATFORM_HCC__ -#endif //__HCC__ - -// Auto enable __HIP_PLATFORM_NVCC__ if compiling with NVCC -#if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__)) -#define __HIP_PLATFORM_NVCC__ -#ifdef __CUDACC__ -#define __HIPCC__ -#endif - -#endif //__NVCC__ - -// Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path -#if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) || \ - (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0) -#define __HIP_DEVICE_COMPILE__ 1 -#endif - -#if __HIP_DEVICE_COMPILE__ == 0 -// 32-bit Atomics -#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0) -#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0) -#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0) -#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0) -#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) - -// 64-bit Atomics -#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0) -#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) - -// Doubles -#define __HIP_ARCH_HAS_DOUBLES__ (0) - -// Warp cross-lane operations -#define __HIP_ARCH_HAS_WARP_VOTE__ (0) -#define __HIP_ARCH_HAS_WARP_BALLOT__ (0) -#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0) -#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) - -// Sync -#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0) -#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) - -// Misc -#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) -#define __HIP_ARCH_HAS_3DGRID__ (0) -#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) -#endif - -#endif diff --git a/src/utils/amd_hip/hip/hip_complex.h b/src/utils/amd_hip/hip/hip_complex.h deleted file mode 100644 index fb9cad5e4..000000000 --- a/src/utils/amd_hip/hip/hip_complex.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HIP_COMPLEX_H -#define HIP_INCLUDE_HIP_HIP_COMPLEX_H - -#include - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - -#endif diff --git a/src/utils/amd_hip/hip/hip_fp16.h b/src/utils/amd_hip/hip/hip_fp16.h deleted file mode 100644 index 994ce62bd..000000000 --- a/src/utils/amd_hip/hip/hip_fp16.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HIP_FP16_H -#define HIP_INCLUDE_HIP_HIP_FP16_H - -#include - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include "cuda_fp16.h" -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - -#endif diff --git a/src/utils/amd_hip/hip/hip_hcc.h b/src/utils/amd_hip/hip/hip_hcc.h deleted file mode 100644 index c07a57fb3..000000000 --- a/src/utils/amd_hip/hip/hip_hcc.h +++ /dev/null @@ -1,105 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HIP_HCC_H -#define HIP_INCLUDE_HIP_HIP_HCC_H - -#ifdef __HCC__ - -#include "hip/hip_runtime_api.h" - -// Forward declarations: -namespace hc { -class accelerator; -class accelerator_view; -}; // namespace hc - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup HCC-specific features - * @warning These APIs provide access to special features of HCC compiler and are not available - *through the CUDA path. - * @{ - */ - - -/** - * @brief Return hc::accelerator associated with the specified deviceId - * @return #hipSuccess, #hipErrorInvalidDevice - */ -hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc); - -/** - * @brief Return hc::accelerator_view associated with the specified stream - * - * If stream is 0, the accelerator_view for the default stream is returned. - * @return #hipSuccess - */ -hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av); - - -/** - * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed - to kernelparams or extra - * - * @param [in[ f Kernel to launch. - * @param [in] gridDimX X grid dimension specified in work-items - * @param [in] gridDimY Y grid dimension specified in work-items - * @param [in] gridDimZ Z grid dimension specified in work-items - * @param [in] blockDimX X block dimensions specified in work-items - * @param [in] blockDimY Y grid dimension specified in work-items - * @param [in] blockDimZ Z grid dimension specified in work-items - * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The - kernel can access this with HIP_DYNAMIC_SHARED. - * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th - default stream is used with associated synchronization rules. - * @param [in] kernelParams - * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and - must be in the memory layout and alignment expected by the kernel. - * @param [in] startEvent If non-null, specified event will be updated to track the start time of - the kernel launch. The event must be created before calling this API. - * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of - the kernel launch. The event must be created before calling this API. - * - * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue - * - * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please - refer to hip_porting_driver_api.md for sample usage. - - * HIP/ROCm actually updates the start event when the associated kernel completes. - */ -hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, - uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, - uint32_t localWorkSizeX, uint32_t localWorkSizeY, - uint32_t localWorkSizeZ, size_t sharedMemBytes, - hipStream_t hStream, void** kernelParams, void** extra, - hipEvent_t startEvent = nullptr, - hipEvent_t stopEvent = nullptr); - -// doxygen end HCC-specific features -/** - * @} - */ -#endif // #ifdef __HCC__ -#endif // #ifdef HIP_INCLUDE_HIP_HIP_HCC_H diff --git a/src/utils/amd_hip/hip/hip_profile.h b/src/utils/amd_hip/hip/hip_profile.h deleted file mode 100644 index 747483925..000000000 --- a/src/utils/amd_hip/hip/hip_profile.h +++ /dev/null @@ -1,42 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HIP_PROFILE_H -#define HIP_INCLUDE_HIP_HIP_PROFILE_H - -#if not defined(ENABLE_HIP_PROFILE) -#define ENABLE_HIP_PROFILE 1 -#endif - -#if defined(__HIP_PLATFORM_HCC__) and (ENABLE_HIP_PROFILE == 1) -#include -#define HIP_SCOPED_MARKER(markerName, group) \ - amdtScopedMarker __scopedMarker(markerName, group, nullptr); -#define HIP_BEGIN_MARKER(markerName, group) amdtBeginMarker(markerName, group, nullptr); -#define HIP_END_MARKER() amdtEndMarker(); -#else -#define HIP_SCOPED_MARKER(markerName, group) -#define HIP_BEGIN_MARKER(markerName, group) -#define HIP_END_MARKER() -#endif - -#endif diff --git a/src/utils/amd_hip/hip/hip_runtime.h b/src/utils/amd_hip/hip/hip_runtime.h deleted file mode 100644 index 937ba61ec..000000000 --- a/src/utils/amd_hip/hip/hip_runtime.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -//! HIP = Heterogeneous-compute Interface for Portability -//! -//! Define a extremely thin runtime layer that allows source code to be compiled unmodified -//! through either AMD HCC or NVCC. Key features tend to be in the spirit -//! and terminology of CUDA, but with a portable path to other accelerators as well: -// -//! Both paths support rich C++ features including classes, templates, lambdas, etc. -//! Runtime API is C -//! Memory management is based on pure pointers and resembles malloc/free/copy. -// -//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch -//! macros. hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ -//! features. - -#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H -#define HIP_INCLUDE_HIP_HIP_RUNTIME_H - -// Some standard header files, these are included by hc.hpp and so want to make them avail on both -// paths to provide a consistent include env and avoid "missing symbol" errors that only appears -// on NVCC path: -#include -#include -#include -#include - -#if __cplusplus > 199711L -#include -#endif - - -#include - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - - -#include -#include - -#endif \ No newline at end of file diff --git a/src/utils/amd_hip/hip/hip_runtime_api.h b/src/utils/amd_hip/hip/hip_runtime_api.h deleted file mode 100644 index 34363689e..000000000 --- a/src/utils/amd_hip/hip/hip_runtime_api.h +++ /dev/null @@ -1,342 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * @file hip_runtime_api.h - * - * @brief Defines the API signatures for HIP runtime. - * This file can be compiled with a standard compiler. - */ - -#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H -#define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H - - -#include // for getDeviceProp -#include - -enum { - HIP_SUCCESS = 0, - HIP_ERROR_INVALID_VALUE, - HIP_ERROR_NOT_INITIALIZED, - HIP_ERROR_LAUNCH_OUT_OF_RESOURCES -}; - -typedef struct { - // 32-bit Atomics - unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. - unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. - unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. - unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. - unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. - - // 64-bit Atomics - unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. - unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. - - // Doubles - unsigned hasDoubles : 1; ///< Double-precision floating point. - - // Warp cross-lane operations - unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). - unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). - unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). - unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. - - // Sync - unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. - unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. - - // Misc - unsigned hasSurfaceFuncs : 1; ///< Surface functions. - unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). - unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. -} hipDeviceArch_t; - - -//--- -// Common headers for both NVCC and HCC paths: - -/** - * hipDeviceProp - * - */ -typedef struct hipDeviceProp_t { - char name[256]; ///< Device name. - size_t totalGlobalMem; ///< Size of global memory region (in bytes). - size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). - int regsPerBlock; ///< Registers per block. - int warpSize; ///< Warp size. - int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. - int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. - int maxGridSize[3]; ///< Max grid dimensions (XYZ). - int clockRate; ///< Max clock frequency of the multiProcessors in khz. - int memoryClockRate; ///< Max global memory clock frequency in khz. - int memoryBusWidth; ///< Global memory bus width in bits. - size_t totalConstMem; ///< Size of shared memory region (in bytes). - int major; ///< Major compute capability. On HCC, this is an approximation and features may - ///< differ from CUDA CC. See the arch feature flags for portable ways to query - ///< feature caps. - int minor; ///< Minor compute capability. On HCC, this is an approximation and features may - ///< differ from CUDA CC. See the arch feature flags for portable ways to query - ///< feature caps. - int multiProcessorCount; ///< Number of multi-processors (compute units). - int l2CacheSize; ///< L2 cache size. - int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. - int computeMode; ///< Compute mode. - int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" - ///< instructions. New for HIP. - hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. - int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. - int pciDomainID; ///< PCI Domain ID - int pciBusID; ///< PCI Bus ID. - int pciDeviceID; ///< PCI Device ID. - size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. - int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. - int canMapHostMemory; ///< Check whether HIP can map host memory - int gcnArch; ///< AMD GCN Arch Value. Eg: 803, 701 - int integrated; ///< APU vs dGPU -} hipDeviceProp_t; - - -/** - * Memory type (for pointer attributes) - */ -typedef enum hipMemoryType { - hipMemoryTypeHost, ///< Memory is physically located on host - hipMemoryTypeDevice, ///< Memory is physically located on device. (see deviceId for specific - ///< device) - hipMemoryTypeArray, ///< Array memory, physically located on device. (see deviceId for specific - ///< device) - hipMemoryTypeUnified ///< Not used currently -}hipMemoryType; - - -/** - * Pointer attributes - */ -typedef struct hipPointerAttribute_t { - enum hipMemoryType memoryType; - int device; - void* devicePointer; - void* hostPointer; - int isManaged; - unsigned allocationFlags; /* flags specified when memory was allocated*/ - /* peers? */ -} hipPointerAttribute_t; - - -// hack to get these to show up in Doxygen: -/** - * @defgroup GlobalDefs Global enum and defines - * @{ - * - */ - -// Ignoring error-code return values from hip APIs is discouraged. On C++17, -// we can make that yield a warning -#if __cplusplus >= 201703L -#define __HIP_NODISCARD [[nodiscard]] -#else -#define __HIP_NODISCARD -#endif - -/* - * @brief hipError_t - * @enum - * @ingroup Enumerations - */ -// Developer note - when updating these, update the hipErrorName and hipErrorString functions in -// NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path. - -typedef enum __HIP_NODISCARD hipError_t { - hipSuccess = 0, ///< Successful completion. - hipErrorOutOfMemory = 2, - hipErrorNotInitialized = 3, - hipErrorDeinitialized = 4, - hipErrorProfilerDisabled = 5, - hipErrorProfilerNotInitialized = 6, - hipErrorProfilerAlreadyStarted = 7, - hipErrorProfilerAlreadyStopped = 8, - hipErrorInsufficientDriver = 35, - hipErrorInvalidImage = 200, - hipErrorInvalidContext = 201, ///< Produced when input context is invalid. - hipErrorContextAlreadyCurrent = 202, - hipErrorMapFailed = 205, - hipErrorUnmapFailed = 206, - hipErrorArrayIsMapped = 207, - hipErrorAlreadyMapped = 208, - hipErrorNoBinaryForGpu = 209, - hipErrorAlreadyAcquired = 210, - hipErrorNotMapped = 211, - hipErrorNotMappedAsArray = 212, - hipErrorNotMappedAsPointer = 213, - hipErrorECCNotCorrectable = 214, - hipErrorUnsupportedLimit = 215, - hipErrorContextAlreadyInUse = 216, - hipErrorPeerAccessUnsupported = 217, - hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX - hipErrorInvalidGraphicsContext = 219, - hipErrorInvalidSource = 300, - hipErrorFileNotFound = 301, - hipErrorSharedObjectSymbolNotFound = 302, - hipErrorSharedObjectInitFailed = 303, - hipErrorOperatingSystem = 304, - hipErrorSetOnActiveProcess = 305, - hipErrorInvalidHandle = 400, - hipErrorNotFound = 500, - hipErrorIllegalAddress = 700, - hipErrorInvalidSymbol = 701, - // Runtime Error Codes start here. - hipErrorMissingConfiguration = 1001, - hipErrorMemoryAllocation = 1002, ///< Memory allocation error. - hipErrorInitializationError = 1003, ///< TODO comment from hipErrorInitializationError - hipErrorLaunchFailure = - 1004, ///< An exception occurred on the device while executing a kernel. - hipErrorPriorLaunchFailure = 1005, - hipErrorLaunchTimeOut = 1006, - hipErrorLaunchOutOfResources = 1007, ///< Out of resources error. - hipErrorInvalidDeviceFunction = 1008, - hipErrorInvalidConfiguration = 1009, - hipErrorInvalidDevice = 1010, ///< DeviceID must be in range 0...#compute-devices. - hipErrorInvalidValue = 1011, ///< One or more of the parameters passed to the API call is NULL - ///< or not in an acceptable range. - hipErrorInvalidDevicePointer = 1017, ///< Invalid Device Pointer - hipErrorInvalidMemcpyDirection = 1021, ///< Invalid memory copy direction - hipErrorUnknown = 1030, ///< Unknown error. - hipErrorInvalidResourceHandle = 1033, ///< Resource handle (hipEvent_t or hipStream_t) invalid. - hipErrorNotReady = 1034, ///< Indicates that asynchronous operations enqueued earlier are not - ///< ready. This is not actually an error, but is used to distinguish - ///< from hipSuccess (which indicates completion). APIs that return - ///< this error include hipEventQuery and hipStreamQuery. - hipErrorNoDevice = 1038, ///< Call to hipGetDeviceCount returned 0 devices - hipErrorPeerAccessAlreadyEnabled = - 1050, ///< Peer access was already enabled from the current device. - - hipErrorPeerAccessNotEnabled = - 1051, ///< Peer access was never enabled from the current device. - hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen - ///< in production systems. - hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically - ///< not seen in production systems. - hipErrorHostMemoryAlreadyRegistered = - 1061, ///< Produced when trying to lock a page-locked memory. - hipErrorHostMemoryNotRegistered = - 1062, ///< Produced when trying to unlock a non-page-locked memory. - hipErrorMapBufferObjectFailed = - 1071, ///< Produced when the IPC memory attach failed from ROCr. - hipErrorAssert = - 1081, ///< Produced when the kernel calls assert. - hipErrorTbd ///< Marker that more error codes are needed. -} hipError_t; - -#undef __HIP_NODISCARD - -/* - * @brief hipDeviceAttribute_t - * @enum - * @ingroup Enumerations - */ -typedef enum hipDeviceAttribute_t { - hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. - hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block. - hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block. - hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block. - hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid. - hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid. - hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid. - hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in - ///< bytes. - hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. - hipDeviceAttributeWarpSize, ///< Warp size in threads. - hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a - ///< thread block. This number is shared by all thread - ///< blocks simultaneously resident on a - ///< multiprocessor. - hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. - hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. - hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. - hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. - hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. - hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 - ///< cache. - hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per - ///< multiprocessor. - hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. - hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. - hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels - ///< concurrently. - hipDeviceAttributePciBusId, ///< PCI Bus ID. - hipDeviceAttributePciDeviceId, ///< PCI Device ID. - hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per - ///< Multiprocessor. - hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. - hipDeviceAttributeIntegrated, ///< iGPU -} hipDeviceAttribute_t; - -enum hipComputeMode { - hipComputeModeDefault = 0, - hipComputeModeExclusive = 1, - hipComputeModeProhibited = 2, - hipComputeModeExclusiveProcess = 3 -}; - -/** - * @} - */ - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include "hip/hcc_detail/hip_runtime_api.h" -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include "hip/nvcc_detail/hip_runtime_api.h" -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - - -/** - * @brief: C++ wrapper for hipMalloc - * - * Perform automatic type conversion to eliminate need for excessive typecasting (ie void**) - * - * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these - * wrappers. It is useful for applications which need to obtain decltypes of - * HIP runtime APIs. - * - * @see hipMalloc - */ -#if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__) -template -static inline hipError_t hipMalloc(T** devPtr, size_t size) { - return hipMalloc((void**)devPtr, size); -} - -// Provide an override to automatically typecast the pointer type from void**, and also provide a -// default for the flags. -template -static inline hipError_t hipHostMalloc(T** ptr, size_t size, - unsigned int flags = hipHostMallocDefault) { - return hipHostMalloc((void**)ptr, size, flags); -} -#endif - -#endif diff --git a/src/utils/amd_hip/hip/hip_texture_types.h b/src/utils/amd_hip/hip/hip_texture_types.h deleted file mode 100644 index a7feab011..000000000 --- a/src/utils/amd_hip/hip/hip_texture_types.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H -#define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - - -#endif diff --git a/src/utils/amd_hip/hip/hip_vector_types.h b/src/utils/amd_hip/hip/hip_vector_types.h deleted file mode 100644 index c1a0373c0..000000000 --- a/src/utils/amd_hip/hip/hip_vector_types.h +++ /dev/null @@ -1,41 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -//! hip_vector_types.h : Defines the HIP vector types. - -#ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H -#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H - -#include - - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#if __cplusplus -#include -#endif -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - -#endif diff --git a/src/utils/amd_hip/hip/math_functions.h b/src/utils/amd_hip/hip/math_functions.h deleted file mode 100644 index 2dfec4551..000000000 --- a/src/utils/amd_hip/hip/math_functions.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_MATH_FUNCTIONS_H -#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H - -// Some standard header files, these are included by hc.hpp and so want to make them avail on both -// paths to provide a consistent include env and avoid "missing symbol" errors that only appears -// on NVCC path: - -#include - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -//#include -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - -#endif diff --git a/src/utils/amd_hip/hip/nvcc_detail/channel_descriptor.h b/src/utils/amd_hip/hip/nvcc_detail/channel_descriptor.h deleted file mode 100644 index c3e9dc1ff..000000000 --- a/src/utils/amd_hip/hip/nvcc_detail/channel_descriptor.h +++ /dev/null @@ -1,28 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H -#define HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H - -#include "channel_descriptor.h" - -#endif diff --git a/src/utils/amd_hip/hip/nvcc_detail/hip_complex.h b/src/utils/amd_hip/hip/nvcc_detail/hip_complex.h deleted file mode 100644 index d0e45d26d..000000000 --- a/src/utils/amd_hip/hip/nvcc_detail/hip_complex.h +++ /dev/null @@ -1,119 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H -#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H - -#include "cuComplex.h" - -typedef cuFloatComplex hipFloatComplex; - -__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return cuCrealf(z); } - -__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return cuCimagf(z); } - -__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { - return make_cuFloatComplex(a, b); -} - -__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { return cuConjf(z); } - -__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) { - return cuCabsf(z) * cuCabsf(z); -} - -__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { - return cuCaddf(p, q); -} - -__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { - return cuCsubf(p, q); -} - -__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { - return cuCmulf(p, q); -} - -__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { - return cuCdivf(p, q); -} - -__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return cuCabsf(z); } - -typedef cuDoubleComplex hipDoubleComplex; - -__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return cuCreal(z); } - -__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return cuCimag(z); } - -__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { - return make_cuDoubleComplex(a, b); -} - -__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { return cuConj(z); } - -__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) { - return cuCabs(z) * cuCabs(z); -} - -__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { - return cuCadd(p, q); -} - -__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { - return cuCsub(p, q); -} - -__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { - return cuCmul(p, q); -} - -__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { - return cuCdiv(p, q); -} - -__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return cuCabs(z); } - -typedef cuFloatComplex hipComplex; - -__device__ __host__ static inline hipComplex make_Complex(float x, float y) { - return make_cuComplex(x, y); -} - -__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { - return cuComplexDoubleToFloat(z); -} - -__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { - return cuComplexFloatToDouble(z); -} - -__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { - return cuCfmaf(p, q, r); -} - -__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, - hipDoubleComplex r) { - return cuCfma(p, q, r); -} - -#endif diff --git a/src/utils/amd_hip/hip/nvcc_detail/hip_runtime.h b/src/utils/amd_hip/hip/nvcc_detail/hip_runtime.h deleted file mode 100644 index 19d740a1e..000000000 --- a/src/utils/amd_hip/hip/nvcc_detail/hip_runtime.h +++ /dev/null @@ -1,126 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H -#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H - -#include - -#include - -#define HIP_KERNEL_NAME(...) __VA_ARGS__ - -typedef int hipLaunchParm; - -#define hipLaunchKernel(kernelName, numblocks, numthreads, memperblock, streamId, ...) \ - do { \ - kernelName<<>>(0, ##__VA_ARGS__); \ - } while (0) - -#define hipLaunchKernelGGL(kernelName, numblocks, numthreads, memperblock, streamId, ...) \ - do { \ - kernelName<<>>(__VA_ARGS__); \ - } while (0) - -#define hipReadModeElementType cudaReadModeElementType - -#ifdef __CUDA_ARCH__ - - -// 32-bit Atomics: -#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (__CUDA_ARCH__ >= 110) -#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 110) -#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (__CUDA_ARCH__ >= 120) -#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 120) -#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (__CUDA_ARCH__ >= 200) - -// 64-bit Atomics: -#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (__CUDA_ARCH__ >= 120) - -// Doubles -#define __HIP_ARCH_HAS_DOUBLES__ (__CUDA_ARCH__ >= 120) - -// warp cross-lane operations: -#define __HIP_ARCH_HAS_WARP_VOTE__ (__CUDA_ARCH__ >= 120) -#define __HIP_ARCH_HAS_WARP_BALLOT__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (__CUDA_ARCH__ >= 300) -#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (__CUDA_ARCH__ >= 350) - -// sync -#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (__CUDA_ARCH__ >= 200) - -// misc -#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_3DGRID__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (__CUDA_ARCH__ >= 350) - -#endif - -#ifdef __CUDACC__ - - -#define hipThreadIdx_x threadIdx.x -#define hipThreadIdx_y threadIdx.y -#define hipThreadIdx_z threadIdx.z - -#define hipBlockIdx_x blockIdx.x -#define hipBlockIdx_y blockIdx.y -#define hipBlockIdx_z blockIdx.z - -#define hipBlockDim_x blockDim.x -#define hipBlockDim_y blockDim.y -#define hipBlockDim_z blockDim.z - -#define hipGridDim_x gridDim.x -#define hipGridDim_y gridDim.y -#define hipGridDim_z gridDim.z - -#define HIP_SYMBOL(X) X - -/** - * extern __shared__ - */ - -#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[]; - -#define HIP_DYNAMIC_SHARED_ATTRIBUTE - -#ifdef __HIP_DEVICE_COMPILE__ -#define abort() \ - { asm("trap;"); } -#undef assert -#define assert(COND) \ - { \ - if (!COND) { \ - abort(); \ - } \ - } -#endif - -#define __clock() clock() -#define __clock64() clock64() - -#endif - -#endif diff --git a/src/utils/amd_hip/hip/nvcc_detail/hip_runtime_api.h b/src/utils/amd_hip/hip/nvcc_detail/hip_runtime_api.h deleted file mode 100644 index 02c4b7ee6..000000000 --- a/src/utils/amd_hip/hip/nvcc_detail/hip_runtime_api.h +++ /dev/null @@ -1,1286 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H -#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef __cplusplus -#define __dparm(x) = x -#else -#define __dparm(x) -#endif - -// TODO -move to include/hip_runtime_api.h as a common implementation. -/** - * Memory copy types - * - */ -typedef enum hipMemcpyKind { - hipMemcpyHostToHost, - hipMemcpyHostToDevice, - hipMemcpyDeviceToHost, - hipMemcpyDeviceToDevice, - hipMemcpyDefault -} hipMemcpyKind; - -// hipTextureAddressMode -#define hipTextureAddressMode cudaTextureAddressMode -#define hipAddressModeWrap cudaAddressModeWrap -#define hipAddressModeClamp cudaAddressModeClamp -#define hipAddressModeMirror cudaAddressModeMirror -#define hipAddressModeBorder cudaAddressModeBorder - -// hipTextureFilterMode -#define hipTextureFilterMode cudaTextureFilterMode -#define hipFilterModePoint cudaFilterModePoint -#define hipFilterModeLinear cudaFilterModeLinear - -// hipTextureReadMode -#define hipTextureReadMode cudaTextureReadMode -#define hipReadModeElementType cudaReadModeElementType -#define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat - -typedef enum hipChannelFormatKind { - hipChannelFormatKindSigned = 0, - hipChannelFormatKindUnsigned = 1, - hipChannelFormatKindFloat = 2, - hipChannelFormatKindNone = 3 -} hipChannelFormatKind; - -#define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode -#define hipBoundaryModeZero cudaBoundaryModeZero -#define hipBoundaryModeTrap cudaBoundaryModeTrap -#define hipBoundaryModeClamp cudaBoundaryModeClamp - -// hipResourceType -#define hipResourceType cudaResourceType -#define hipResourceTypeArray cudaResourceTypeArray -#define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray -#define hipResourceTypeLinear cudaResourceTypeLinear -#define hipResourceTypePitch2D cudaResourceTypePitch2D -// -// hipErrorNoDevice. - - -//! Flags that can be used with hipEventCreateWithFlags: -#define hipEventDefault cudaEventDefault -#define hipEventBlockingSync cudaEventBlockingSync -#define hipEventDisableTiming cudaEventDisableTiming -#define hipEventInterprocess cudaEventInterprocess -#define hipEventReleaseToDevice 0 /* no-op on CUDA platform */ -#define hipEventReleaseToSystem 0 /* no-op on CUDA platform */ - - -#define hipHostMallocDefault cudaHostAllocDefault -#define hipHostMallocPortable cudaHostAllocPortable -#define hipHostMallocMapped cudaHostAllocMapped -#define hipHostMallocWriteCombined cudaHostAllocWriteCombined -#define hipHostMallocCoherent 0x0 -#define hipHostMallocNonCoherent 0x0 - -#define hipHostRegisterDefault cudaHostRegisterDefault -#define hipHostRegisterPortable cudaHostRegisterPortable -#define hipHostRegisterMapped cudaHostRegisterMapped -#define hipHostRegisterIoMemory cudaHostRegisterIoMemory - -#define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER -#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE -#define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END -#define hipLimitMallocHeapSize cudaLimitMallocHeapSize -#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess - -// enum CUjit_option redefines -#define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS -#define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK -#define hipJitOptionWallTime CU_JIT_WALL_TIME -#define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER -#define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES -#define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER -#define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES -#define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL -#define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT -#define hipJitOptionTarget CU_JIT_TARGET -#define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY -#define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO -#define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE -#define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO -#define hipJitOptionCacheMode CU_JIT_CACHE_MODE -#define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT -#define hipJitOptionFastCompile CU_JIT_FAST_COMPILE -#define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS - -typedef cudaEvent_t hipEvent_t; -typedef cudaStream_t hipStream_t; -typedef cudaIpcEventHandle_t hipIpcEventHandle_t; -typedef cudaIpcMemHandle_t hipIpcMemHandle_t; -typedef enum cudaLimit hipLimit_t; -typedef enum cudaFuncCache hipFuncCache_t; -typedef CUcontext hipCtx_t; -typedef cudaSharedMemConfig hipSharedMemConfig; -typedef CUfunc_cache hipFuncCache; -typedef CUjit_option hipJitOption; -typedef CUdevice hipDevice_t; -typedef CUmodule hipModule_t; -typedef CUfunction hipFunction_t; -typedef CUdeviceptr hipDeviceptr_t; -typedef struct cudaArray hipArray; -typedef struct cudaArray* hipArray_const_t; -typedef cudaFuncAttributes hipFuncAttributes; -#define hipMemcpy3DParms cudaMemcpy3DParms -#define hipArrayDefault cudaArrayDefault -#define hipArrayLayered cudaArrayLayered -#define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore -#define hipArrayCubemap cudaArrayCubemap -#define hipArrayTextureGather cudaArrayTextureGather - -typedef cudaTextureObject_t hipTextureObject_t; -typedef cudaSurfaceObject_t hipSurfaceObject_t; -#define hipTextureType1D cudaTextureType1D -#define hipTextureType1DLayered cudaTextureType1DLayered -#define hipTextureType2D cudaTextureType2D -#define hipTextureType2DLayered cudaTextureType2DLayered -#define hipTextureType3D cudaTextureType3D -#define hipDeviceMapHost cudaDeviceMapHost - -#define hipExtent cudaExtent -#define hipPitchedPtr cudaPitchedPtr -#define make_hipExtent make_cudaExtent -#define make_hipPos make_cudaPos -#define make_hipPitchedPtr make_cudaPitchedPtr -// Flags that can be used with hipStreamCreateWithFlags -#define hipStreamDefault cudaStreamDefault -#define hipStreamNonBlocking cudaStreamNonBlocking - -#define hipChannelFormatDesc cudaChannelFormatDesc -#define hipResourceDesc cudaResourceDesc -#define hipTextureDesc cudaTextureDesc -#define hipResourceViewDesc cudaResourceViewDesc -// adding code for hipmemSharedConfig -#define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault -#define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte -#define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte - -inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) { - switch (cuError) { - case cudaSuccess: - return hipSuccess; - case cudaErrorMemoryAllocation: - return hipErrorMemoryAllocation; - case cudaErrorLaunchOutOfResources: - return hipErrorLaunchOutOfResources; - case cudaErrorInvalidValue: - return hipErrorInvalidValue; - case cudaErrorInvalidResourceHandle: - return hipErrorInvalidResourceHandle; - case cudaErrorInvalidDevice: - return hipErrorInvalidDevice; - case cudaErrorInvalidMemcpyDirection: - return hipErrorInvalidMemcpyDirection; - case cudaErrorInvalidDevicePointer: - return hipErrorInvalidDevicePointer; - case cudaErrorInitializationError: - return hipErrorInitializationError; - case cudaErrorNoDevice: - return hipErrorNoDevice; - case cudaErrorNotReady: - return hipErrorNotReady; - case cudaErrorUnknown: - return hipErrorUnknown; - case cudaErrorPeerAccessNotEnabled: - return hipErrorPeerAccessNotEnabled; - case cudaErrorPeerAccessAlreadyEnabled: - return hipErrorPeerAccessAlreadyEnabled; - case cudaErrorHostMemoryAlreadyRegistered: - return hipErrorHostMemoryAlreadyRegistered; - case cudaErrorHostMemoryNotRegistered: - return hipErrorHostMemoryNotRegistered; - case cudaErrorUnsupportedLimit: - return hipErrorUnsupportedLimit; - default: - return hipErrorUnknown; // Note - translated error. - } -} - -inline static hipError_t hipCUResultTohipError(CUresult cuError) { // TODO Populate further - switch (cuError) { - case CUDA_SUCCESS: - return hipSuccess; - case CUDA_ERROR_OUT_OF_MEMORY: - return hipErrorMemoryAllocation; - case CUDA_ERROR_INVALID_VALUE: - return hipErrorInvalidValue; - case CUDA_ERROR_INVALID_DEVICE: - return hipErrorInvalidDevice; - case CUDA_ERROR_DEINITIALIZED: - return hipErrorDeinitialized; - case CUDA_ERROR_NO_DEVICE: - return hipErrorNoDevice; - case CUDA_ERROR_INVALID_CONTEXT: - return hipErrorInvalidContext; - case CUDA_ERROR_NOT_INITIALIZED: - return hipErrorNotInitialized; - default: - return hipErrorUnknown; // Note - translated error. - } -} - -// TODO match the error enum names of hip and cuda -inline static cudaError_t hipErrorToCudaError(hipError_t hError) { - switch (hError) { - case hipSuccess: - return cudaSuccess; - case hipErrorMemoryAllocation: - return cudaErrorMemoryAllocation; - case hipErrorLaunchOutOfResources: - return cudaErrorLaunchOutOfResources; - case hipErrorInvalidValue: - return cudaErrorInvalidValue; - case hipErrorInvalidResourceHandle: - return cudaErrorInvalidResourceHandle; - case hipErrorInvalidDevice: - return cudaErrorInvalidDevice; - case hipErrorInvalidMemcpyDirection: - return cudaErrorInvalidMemcpyDirection; - case hipErrorInvalidDevicePointer: - return cudaErrorInvalidDevicePointer; - case hipErrorInitializationError: - return cudaErrorInitializationError; - case hipErrorNoDevice: - return cudaErrorNoDevice; - case hipErrorNotReady: - return cudaErrorNotReady; - case hipErrorUnknown: - return cudaErrorUnknown; - case hipErrorPeerAccessNotEnabled: - return cudaErrorPeerAccessNotEnabled; - case hipErrorPeerAccessAlreadyEnabled: - return cudaErrorPeerAccessAlreadyEnabled; - case hipErrorRuntimeMemory: - return cudaErrorUnknown; // Does not exist in CUDA - case hipErrorRuntimeOther: - return cudaErrorUnknown; // Does not exist in CUDA - case hipErrorHostMemoryAlreadyRegistered: - return cudaErrorHostMemoryAlreadyRegistered; - case hipErrorHostMemoryNotRegistered: - return cudaErrorHostMemoryNotRegistered; - case hipErrorTbd: - return cudaErrorUnknown; // Note - translated error. - default: - return cudaErrorUnknown; // Note - translated error. - } -} - -inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) { - switch (kind) { - case hipMemcpyHostToHost: - return cudaMemcpyHostToHost; - case hipMemcpyHostToDevice: - return cudaMemcpyHostToDevice; - case hipMemcpyDeviceToHost: - return cudaMemcpyDeviceToHost; - case hipMemcpyDeviceToDevice: - return cudaMemcpyDeviceToDevice; - default: - return cudaMemcpyDefault; - } -} - -inline static cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode( - hipTextureAddressMode kind) { - switch (kind) { - case hipAddressModeWrap: - return cudaAddressModeWrap; - case hipAddressModeClamp: - return cudaAddressModeClamp; - case hipAddressModeMirror: - return cudaAddressModeMirror; - case hipAddressModeBorder: - return cudaAddressModeBorder; - default: - return cudaAddressModeWrap; - } -} - -inline static cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode( - hipTextureFilterMode kind) { - switch (kind) { - case hipFilterModePoint: - return cudaFilterModePoint; - case hipFilterModeLinear: - return cudaFilterModeLinear; - default: - return cudaFilterModePoint; - } -} - -inline static cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) { - switch (kind) { - case hipReadModeElementType: - return cudaReadModeElementType; - case hipReadModeNormalizedFloat: - return cudaReadModeNormalizedFloat; - default: - return cudaReadModeElementType; - } -} - -inline static cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind( - hipChannelFormatKind kind) { - switch (kind) { - case hipChannelFormatKindSigned: - return cudaChannelFormatKindSigned; - case hipChannelFormatKindUnsigned: - return cudaChannelFormatKindUnsigned; - case hipChannelFormatKindFloat: - return cudaChannelFormatKindFloat; - case hipChannelFormatKindNone: - return cudaChannelFormatKindNone; - default: - return cudaChannelFormatKindNone; - } -} - -/** - * Stream CallBack struct - */ -#define HIPRT_CB CUDART_CB -typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); -inline static hipError_t hipInit(unsigned int flags) { - return hipCUResultTohipError(cuInit(flags)); -} - -inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); } - -inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); } - -inline static hipError_t hipPeekAtLastError() { - return hipCUDAErrorTohipError(cudaPeekAtLastError()); -} - -inline static hipError_t hipMalloc(void** ptr, size_t size) { - return hipCUDAErrorTohipError(cudaMalloc(ptr, size)); -} - -inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { - return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height)); -} - -inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { - return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent)); -} - -inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); } - -inline static hipError_t hipMallocHost(void** ptr, size_t size) - __attribute__((deprecated("use hipHostMalloc instead"))); -inline static hipError_t hipMallocHost(void** ptr, size_t size) { - return hipCUDAErrorTohipError(cudaMallocHost(ptr, size)); -} - -inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) - __attribute__((deprecated("use hipHostMalloc instead"))); -inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) { - return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); -} - -inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) { - return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); -} - -inline static hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* desc, - size_t width, size_t height, - unsigned int flags __dparm(hipArrayDefault)) { - return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags)); -} - -inline static hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc, - struct hipExtent extent, unsigned int flags) { - return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags)); -} - -inline static hipError_t hipFreeArray(hipArray* array) { - return hipCUDAErrorTohipError(cudaFreeArray(array)); -} - -inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) { - return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags)); -} - -inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { - return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr)); -} - -inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) { - return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags)); -} - -inline static hipError_t hipHostUnregister(void* ptr) { - return hipCUDAErrorTohipError(cudaHostUnregister(ptr)); -} - -inline static hipError_t hipFreeHost(void* ptr) - __attribute__((deprecated("use hipHostFree instead"))); -inline static hipError_t hipFreeHost(void* ptr) { - return hipCUDAErrorTohipError(cudaFreeHost(ptr)); -} - -inline static hipError_t hipHostFree(void* ptr) { - return hipCUDAErrorTohipError(cudaFreeHost(ptr)); -} - -inline static hipError_t hipSetDevice(int device) { - return hipCUDAErrorTohipError(cudaSetDevice(device)); -} - -inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) { - struct cudaDeviceProp cdprop; - memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp)); - cdprop.major = prop->major; - cdprop.minor = prop->minor; - cdprop.totalGlobalMem = prop->totalGlobalMem; - cdprop.sharedMemPerBlock = prop->sharedMemPerBlock; - cdprop.regsPerBlock = prop->regsPerBlock; - cdprop.warpSize = prop->warpSize; - cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock; - cdprop.clockRate = prop->clockRate; - cdprop.totalConstMem = prop->totalConstMem; - cdprop.multiProcessorCount = prop->multiProcessorCount; - cdprop.l2CacheSize = prop->l2CacheSize; - cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor; - cdprop.computeMode = prop->computeMode; - cdprop.canMapHostMemory = prop->canMapHostMemory; - cdprop.memoryClockRate = prop->memoryClockRate; - cdprop.memoryBusWidth = prop->memoryBusWidth; - return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop)); -} - -inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) { - return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size)); -} - -inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) { - return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size)); -} - -inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) { - return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size)); -} - -inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size, - hipStream_t stream) { - return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream)); -} - -inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size, - hipStream_t stream) { - return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream)); -} - -inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size, - hipStream_t stream) { - return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream)); -} - -inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, - hipMemcpyKind copyKind) { - return hipCUDAErrorTohipError( - cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind))); -} - - -inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, - hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError( - cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream)); -} - -inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, - size_t offset __dparm(0), - hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) { - return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset, - hipMemcpyKindToCudaMemcpyKind(copyType))); -} - -inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, - size_t sizeBytes, size_t offset, - hipMemcpyKind copyType, - hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync( - symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream)); -} - -inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes, - size_t offset __dparm(0), - hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { - return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset, - hipMemcpyKindToCudaMemcpyKind(kind))); -} - -inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, - size_t sizeBytes, size_t offset, - hipMemcpyKind kind, - hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync( - dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream)); -} - -inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { - return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName)); -} - -inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) { - return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName)); -} - -inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, - size_t width, size_t height, hipMemcpyKind kind) { - return hipCUDAErrorTohipError( - cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind))); -} - -inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) -{ - return hipCUDAErrorTohipError(cudaMemcpy3D(p)); -} - -inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, - size_t width, size_t height, hipMemcpyKind kind, - hipStream_t stream) { - return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, - hipMemcpyKindToCudaMemcpyKind(kind), stream)); -} - -inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, - const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, - height, hipMemcpyKindToCudaMemcpyKind(kind))); -} - -inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, - const void* src, size_t count, hipMemcpyKind kind) { - return hipCUDAErrorTohipError( - cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind))); -} - -inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, - size_t hOffset, size_t count, hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count, - hipMemcpyKindToCudaMemcpyKind(kind))); -} - -inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, - size_t count) { - return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count)); -} - -inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, - size_t count) { - return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count)); -} - -inline static hipError_t hipDeviceSynchronize() { - return hipCUDAErrorTohipError(cudaDeviceSynchronize()); -} - -inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) { - return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig)); -} - -inline static const char* hipGetErrorString(hipError_t error) { - return cudaGetErrorString(hipErrorToCudaError(error)); -} - -inline static const char* hipGetErrorName(hipError_t error) { - return cudaGetErrorName(hipErrorToCudaError(error)); -} - -inline static hipError_t hipGetDeviceCount(int* count) { - return hipCUDAErrorTohipError(cudaGetDeviceCount(count)); -} - -inline static hipError_t hipGetDevice(int* device) { - return hipCUDAErrorTohipError(cudaGetDevice(device)); -} - -inline static hipError_t hipIpcCloseMemHandle(void* devPtr) { - return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr)); -} - -inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) { - return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event)); -} - -inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) { - return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr)); -} - -inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) { - return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle)); -} - -inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, - unsigned int flags) { - return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags)); -} - -inline static hipError_t hipMemset(void* devPtr, int value, size_t count) { - return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count)); -} - -inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count, - hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream)); -} - -inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) { - return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes)); -} - -inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { - return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height)); -} - -inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream)); -} - -inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){ - return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent)); -} - -inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){ - return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream)); -} - -inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) { - struct cudaDeviceProp cdprop; - cudaError_t cerror; - cerror = cudaGetDeviceProperties(&cdprop, device); - strncpy(p_prop->name, cdprop.name, 256); - p_prop->totalGlobalMem = cdprop.totalGlobalMem; - p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock; - p_prop->regsPerBlock = cdprop.regsPerBlock; - p_prop->warpSize = cdprop.warpSize; - for (int i = 0; i < 3; i++) { - p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i]; - p_prop->maxGridSize[i] = cdprop.maxGridSize[i]; - } - p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock; - p_prop->clockRate = cdprop.clockRate; - p_prop->totalConstMem = cdprop.totalConstMem; - p_prop->major = cdprop.major; - p_prop->minor = cdprop.minor; - p_prop->multiProcessorCount = cdprop.multiProcessorCount; - p_prop->l2CacheSize = cdprop.l2CacheSize; - p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor; - p_prop->computeMode = cdprop.computeMode; - p_prop->canMapHostMemory = cdprop.canMapHostMemory; - p_prop->memoryClockRate = cdprop.memoryClockRate; - p_prop->memoryBusWidth = cdprop.memoryBusWidth; - - // Same as clock-rate: - p_prop->clockInstructionRate = cdprop.clockRate; - - int ccVers = p_prop->major * 100 + p_prop->minor * 10; - - p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110); - p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110); - p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120); - p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120); - - p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200); - - p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120); - p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110); - - p_prop->arch.hasDoubles = (ccVers >= 130); - - p_prop->arch.hasWarpVote = (ccVers >= 120); - p_prop->arch.hasWarpBallot = (ccVers >= 200); - p_prop->arch.hasWarpShuffle = (ccVers >= 300); - p_prop->arch.hasFunnelShift = (ccVers >= 350); - - p_prop->arch.hasThreadFenceSystem = (ccVers >= 200); - p_prop->arch.hasSyncThreadsExt = (ccVers >= 200); - - p_prop->arch.hasSurfaceFuncs = (ccVers >= 200); - p_prop->arch.has3dGrid = (ccVers >= 200); - p_prop->arch.hasDynamicParallelism = (ccVers >= 350); - - p_prop->concurrentKernels = cdprop.concurrentKernels; - p_prop->pciBusID = cdprop.pciBusID; - p_prop->pciDeviceID = cdprop.pciDeviceID; - p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor; - p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard; - - return hipCUDAErrorTohipError(cerror); -} - -inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { - enum cudaDeviceAttr cdattr; - cudaError_t cerror; - - switch (attr) { - case hipDeviceAttributeMaxThreadsPerBlock: - cdattr = cudaDevAttrMaxThreadsPerBlock; - break; - case hipDeviceAttributeMaxBlockDimX: - cdattr = cudaDevAttrMaxBlockDimX; - break; - case hipDeviceAttributeMaxBlockDimY: - cdattr = cudaDevAttrMaxBlockDimY; - break; - case hipDeviceAttributeMaxBlockDimZ: - cdattr = cudaDevAttrMaxBlockDimZ; - break; - case hipDeviceAttributeMaxGridDimX: - cdattr = cudaDevAttrMaxGridDimX; - break; - case hipDeviceAttributeMaxGridDimY: - cdattr = cudaDevAttrMaxGridDimY; - break; - case hipDeviceAttributeMaxGridDimZ: - cdattr = cudaDevAttrMaxGridDimZ; - break; - case hipDeviceAttributeMaxSharedMemoryPerBlock: - cdattr = cudaDevAttrMaxSharedMemoryPerBlock; - break; - case hipDeviceAttributeTotalConstantMemory: - cdattr = cudaDevAttrTotalConstantMemory; - break; - case hipDeviceAttributeWarpSize: - cdattr = cudaDevAttrWarpSize; - break; - case hipDeviceAttributeMaxRegistersPerBlock: - cdattr = cudaDevAttrMaxRegistersPerBlock; - break; - case hipDeviceAttributeClockRate: - cdattr = cudaDevAttrClockRate; - break; - case hipDeviceAttributeMemoryClockRate: - cdattr = cudaDevAttrMemoryClockRate; - break; - case hipDeviceAttributeMemoryBusWidth: - cdattr = cudaDevAttrGlobalMemoryBusWidth; - break; - case hipDeviceAttributeMultiprocessorCount: - cdattr = cudaDevAttrMultiProcessorCount; - break; - case hipDeviceAttributeComputeMode: - cdattr = cudaDevAttrComputeMode; - break; - case hipDeviceAttributeL2CacheSize: - cdattr = cudaDevAttrL2CacheSize; - break; - case hipDeviceAttributeMaxThreadsPerMultiProcessor: - cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; - break; - case hipDeviceAttributeComputeCapabilityMajor: - cdattr = cudaDevAttrComputeCapabilityMajor; - break; - case hipDeviceAttributeComputeCapabilityMinor: - cdattr = cudaDevAttrComputeCapabilityMinor; - break; - case hipDeviceAttributeConcurrentKernels: - cdattr = cudaDevAttrConcurrentKernels; - break; - case hipDeviceAttributePciBusId: - cdattr = cudaDevAttrPciBusId; - break; - case hipDeviceAttributePciDeviceId: - cdattr = cudaDevAttrPciDeviceId; - break; - case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: - cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor; - break; - case hipDeviceAttributeIsMultiGpuBoard: - cdattr = cudaDevAttrIsMultiGpuBoard; - break; - case hipDeviceAttributeIntegrated: - cdattr = cudaDevAttrIntegrated; - break; - default: - cerror = cudaErrorInvalidValue; - break; - } - - cerror = cudaDeviceGetAttribute(pi, cdattr, device); - - return hipCUDAErrorTohipError(cerror); -} - -inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, - const void* func, - int blockSize, - size_t dynamicSMemSize) { - cudaError_t cerror; - cerror = - cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, blockSize, dynamicSMemSize); - return hipCUDAErrorTohipError(cerror); -} - -inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, void* ptr) { - struct cudaPointerAttributes cPA; - hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr)); - if (err == hipSuccess) { - switch (cPA.memoryType) { - case cudaMemoryTypeDevice: - attributes->memoryType = hipMemoryTypeDevice; - break; - case cudaMemoryTypeHost: - attributes->memoryType = hipMemoryTypeHost; - break; - default: - return hipErrorUnknown; - } - attributes->device = cPA.device; - attributes->devicePointer = cPA.devicePointer; - attributes->hostPointer = cPA.hostPointer; - attributes->isManaged = 0; - attributes->allocationFlags = 0; - } - return err; -} - - -inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) { - return hipCUDAErrorTohipError(cudaMemGetInfo(free, total)); -} - -inline static hipError_t hipEventCreate(hipEvent_t* event) { - return hipCUDAErrorTohipError(cudaEventCreate(event)); -} - -inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) { - return hipCUDAErrorTohipError(cudaEventRecord(event, stream)); -} - -inline static hipError_t hipEventSynchronize(hipEvent_t event) { - return hipCUDAErrorTohipError(cudaEventSynchronize(event)); -} - -inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) { - return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop)); -} - -inline static hipError_t hipEventDestroy(hipEvent_t event) { - return hipCUDAErrorTohipError(cudaEventDestroy(event)); -} - - -inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) { - return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags)); -} - -inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { - return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority)); -} - -inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { - return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority)); -} - -inline static hipError_t hipStreamCreate(hipStream_t* stream) { - return hipCUDAErrorTohipError(cudaStreamCreate(stream)); -} - -inline static hipError_t hipStreamSynchronize(hipStream_t stream) { - return hipCUDAErrorTohipError(cudaStreamSynchronize(stream)); -} - -inline static hipError_t hipStreamDestroy(hipStream_t stream) { - return hipCUDAErrorTohipError(cudaStreamDestroy(stream)); -} - -inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { - return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags)); -} - -inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) { - return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority)); -} - -inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, - unsigned int flags) { - return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags)); -} - -inline static hipError_t hipStreamQuery(hipStream_t stream) { - return hipCUDAErrorTohipError(cudaStreamQuery(stream)); -} - -inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, - void* userData, unsigned int flags) { - return hipCUDAErrorTohipError( - cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags)); -} - -inline static hipError_t hipDriverGetVersion(int* driverVersion) { - cudaError_t err = cudaDriverGetVersion(driverVersion); - - // Override driver version to match version reported on HCC side. - *driverVersion = 4; - - return hipCUDAErrorTohipError(err); -} - -inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) { - return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion)); -} - -inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) { - return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice)); -} - -inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) { - return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice)); -} - -inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags)); -} - -inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { - return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx)); -} - -inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { - return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags)); -} - -inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, - int* active) { - return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active)); -} - -inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { - return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev)); -} - -inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { - return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev)); -} - -inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { - return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev)); -} - -inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { - return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags)); -} - -inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, - hipDeviceptr_t dptr) { - return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr)); -} - -inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, - size_t count) { - return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count)); -} - -inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, - int srcDevice, size_t count, - hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError( - cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream)); -} - -// Profile APIs: -inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); } - -inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); } - -inline static hipError_t hipSetDeviceFlags(unsigned int flags) { - return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags)); -} - -inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) { - return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags)); -} - -inline static hipError_t hipEventQuery(hipEvent_t event) { - return hipCUDAErrorTohipError(cudaEventQuery(event)); -} - -inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) { - return hipCUResultTohipError(cuCtxCreate(ctx, flags, device)); -} - -inline static hipError_t hipCtxDestroy(hipCtx_t ctx) { - return hipCUResultTohipError(cuCtxDestroy(ctx)); -} - -inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { - return hipCUResultTohipError(cuCtxPopCurrent(ctx)); -} - -inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) { - return hipCUResultTohipError(cuCtxPushCurrent(ctx)); -} - -inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) { - return hipCUResultTohipError(cuCtxSetCurrent(ctx)); -} - -inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { - return hipCUResultTohipError(cuCtxGetCurrent(ctx)); -} - -inline static hipError_t hipCtxGetDevice(hipDevice_t* device) { - return hipCUResultTohipError(cuCtxGetDevice(device)); -} - -inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { - return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion)); -} - -inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) { - return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig)); -} - -inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) { - return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig)); -} - -inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { - return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config)); -} - -inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { - return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig)); -} - -inline static hipError_t hipCtxSynchronize(void) { - return hipCUResultTohipError(cuCtxSynchronize()); -} - -inline static hipError_t hipCtxGetFlags(unsigned int* flags) { - return hipCUResultTohipError(cuCtxGetFlags(flags)); -} - -inline static hipError_t hipCtxDetach(hipCtx_t ctx) { - return hipCUResultTohipError(cuCtxDetach(ctx)); -} - -inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) { - return hipCUResultTohipError(cuDeviceGet(device, ordinal)); -} - -inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) { - return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device)); -} - -inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) { - return hipCUResultTohipError(cuDeviceGetName(name, len, device)); -} - -inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) { - return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device)); -} - -inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) { - return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId)); -} - -inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) { - return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config)); -} - -inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) { - return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config)); -} - -inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) { - return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit)); -} - -inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) { - return hipCUResultTohipError(cuDeviceTotalMem(bytes, device)); -} - -inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { - return hipCUResultTohipError(cuModuleLoad(module, fname)); -} - -inline static hipError_t hipModuleUnload(hipModule_t hmod) { - return hipCUResultTohipError(cuModuleUnload(hmod)); -} - -inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, - const char* kname) { - return hipCUResultTohipError(cuModuleGetFunction(function, module, kname)); -} - -inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { - return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func)); -} - -inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, - const char* name) { - return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name)); -} - -inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) { - return hipCUResultTohipError(cuModuleLoadData(module, image)); -} - -inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, - unsigned int numOptions, hipJitOption* options, - void** optionValues) { - return hipCUResultTohipError( - cuModuleLoadDataEx(module, image, numOptions, options, optionValues)); -} - -inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, - unsigned int gridDimY, unsigned int gridDimZ, - unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, unsigned int sharedMemBytes, - hipStream_t stream, void** kernelParams, - void** extra) { - return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, - blockDimY, blockDimZ, sharedMemBytes, stream, - kernelParams, extra)); -} - - -inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) { - return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig)); -} - -#ifdef __cplusplus -} -#endif - -#ifdef __CUDACC__ - -template -inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func, - size_t dynamicSMemSize = 0, - int blockSizeLimit = 0, - unsigned int flags = 0) { - cudaError_t cerror; - cerror = cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, dynamicSMemSize, - blockSizeLimit, flags); - return hipCUDAErrorTohipError(cerror); -} - -template -inline static hipError_t hipBindTexture(size_t* offset, const struct texture& tex, - const void* devPtr, size_t size = UINT_MAX) { - return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size)); -} - -template -inline static hipError_t hipBindTexture(size_t* offset, struct texture& tex, - const void* devPtr, const struct hipChannelFormatDesc& desc, - size_t size = UINT_MAX) { - return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); -} - -template -inline static hipError_t hipUnbindTexture(struct texture* tex) { - return hipCUDAErrorTohipError(cudaUnbindTexture(tex)); -} - -inline static hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, - const hipChannelFormatDesc* desc, size_t size = UINT_MAX){ - return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); -} - -template -inline static hipError_t hipBindTextureToArray(struct texture& tex, - hipArray_const_t array, - const struct hipChannelFormatDesc& desc) { - return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); -} - -template -inline static hipError_t hipBindTextureToArray(struct texture *tex, - hipArray_const_t array, - const struct hipChannelFormatDesc* desc) { - return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); -} - -template -inline static hipError_t hipBindTextureToArray(struct texture& tex, - hipArray_const_t array) { - return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array)); -} - -template -inline static hipChannelFormatDesc hipCreateChannelDesc() { - return cudaCreateChannelDesc(); -} - -inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, - hipChannelFormatKind f) { - return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f)); -} - -inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, - const hipResourceDesc* pResDesc, - const hipTextureDesc* pTexDesc, - const hipResourceViewDesc* pResViewDesc) { - return hipCUDAErrorTohipError( - cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc)); -} - -inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { - return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject)); -} - -inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, - const hipResourceDesc* pResDesc) { - return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc)); -} - -inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { - return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject)); -} - -inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, - hipTextureObject_t textureObject) { - return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject)); -} - -inline static hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref) -{ - return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref)); -} - -inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) -{ - return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array)); -} -#endif //__CUDACC__ - -#endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H diff --git a/src/utils/amd_hip/hip/nvcc_detail/hip_texture_types.h b/src/utils/amd_hip/hip/nvcc_detail/hip_texture_types.h deleted file mode 100644 index 751dd8e4d..000000000 --- a/src/utils/amd_hip/hip/nvcc_detail/hip_texture_types.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H -#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H - -#include - -#endif diff --git a/src/utils/amd_hip/hip/texture_types.h b/src/utils/amd_hip/hip/texture_types.h deleted file mode 100644 index 7d785708d..000000000 --- a/src/utils/amd_hip/hip/texture_types.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H -#define HIP_INCLUDE_HIP_TEXTURE_TYPES_H - -#include - -#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) -#include -#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__) -#include "texture_types.h" -#else -#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); -#endif - -#endif From 5e809c7e00ef57500dc184fa11715f9ad863ffb4 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Thu, 14 Mar 2019 16:48:01 +0100 Subject: [PATCH 21/28] added explicit output directory for library --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 24fe5fcc5..b927adef8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -38,7 +38,7 @@ if(USE_ROCM) file(GLOB_RECURSE HIPFILES_KERNELS "Kernels/*.cu") file(GLOB_RECURSE HIPFILES_SDDK "SDDK/*.cu") rocm_hip_add_library(sirius_rocm SHARED ${HIPFILES_SDDK} ${HIPFILES_KERNELS} - FLAGS ${DEFINITIONS_GENERATOR} ${INCLUDE_DIR_GENERATOR} "-Wno-macro-redefined") + FLAGS ${DEFINITIONS_GENERATOR} ${INCLUDE_DIR_GENERATOR} "-Wno-macro-redefined" OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) endif() install(DIRECTORY ./ DESTINATION "${CMAKE_INSTALL_PREFIX}/include/sirius" FILES_MATCHING REGEX ".*(hpp|h)$") From d2708893962706db91cc21b1357e3f3490699db3 Mon Sep 17 00:00:00 2001 From: Simon Frasch Date: Thu, 14 Mar 2019 16:58:26 +0100 Subject: [PATCH 22/28] fixed duplicate declaration --- src/SDDK/GPU/acc.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/SDDK/GPU/acc.hpp b/src/SDDK/GPU/acc.hpp index faa43cdc2..e89b2db9e 100644 --- a/src/SDDK/GPU/acc.hpp +++ b/src/SDDK/GPU/acc.hpp @@ -83,7 +83,6 @@ using acc_complex_double_t = cuDoubleComplex; #define ACC_DYNAMIC_SHARED(type, var) extern __shared__ type var[]; #elif defined(__ROCM) -using acc_error_t = hipError_t; using acc_complex_float_t = hipFloatComplex; using acc_complex_double_t = hipDoubleComplex; #define make_accDoubleComplex make_hipDoubleComplex From 75cfb4e93f40944f8302d8106225fa7a6afb9d36 Mon Sep 17 00:00:00 2001 From: toxa81 Date: Mon, 18 Mar 2019 10:53:05 +0100 Subject: [PATCH 23/28] update reference files with stress tensor and forces --- verification/test1/output_ref.json | 1117 ++++++------- verification/test10/output_ref.json | 947 ++++++----- verification/test10/sirius.json | 4 +- verification/test11/output_ref.json | 1133 ++++++++------ verification/test11/sirius.json | 4 +- verification/test14/output_ref.json | 1244 ++++++++------- .../test15/F.pz-n-kjpaw_psl.0.1.UPF.json | 277 ++++ .../test15/Li.pz-s-kjpaw_psl.0.2.1.UPF.json | 274 ++++ verification/test15/sirius.json | 77 + verification/test2/sirius.json | 5 +- verification/test3/output_ref.json | 1102 ++++++------- verification/test4/output_ref.json | 1261 ++++++++------- verification/test4/sirius.json | 4 +- verification/test5/output_ref.json | 1202 ++++++++------ verification/test5/sirius.json | 4 +- verification/test6/output_ref.json | 1208 +++++++------- verification/test6/sirius.json | 4 +- verification/test7/output_ref.json | 1289 ++++++++------- verification/test7/sirius.json | 4 +- verification/test8/output_ref.json | 1244 +++++++++------ verification/test8/sirius.json | 4 +- verification/test9/output_ref.json | 1387 +++++++++-------- verification/test9/sirius.json | 4 +- 23 files changed, 7892 insertions(+), 5907 deletions(-) create mode 100644 verification/test15/F.pz-n-kjpaw_psl.0.1.UPF.json create mode 100644 verification/test15/Li.pz-s-kjpaw_psl.0.2.1.UPF.json create mode 100644 verification/test15/sirius.json diff --git a/verification/test1/output_ref.json b/verification/test1/output_ref.json index 87186b4fc..371db93ba 100644 --- a/verification/test1/output_ref.json +++ b/verification/test1/output_ref.json @@ -4,1019 +4,1024 @@ "band_evp_work_count": 1268.7318281250011, "local_operator_num_applied": 4705 }, - "git_hash": "2e3b15c5ba596aa7175f83b0cfdbc56168d6e822", + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, "band_gap": 0.0, "chemical_formula": "SrVO3", "converged": true, "core_leakage": 0.0, - "efermi": 0.41397746414547004, + "efermi": 0.41397746414238856, "energy": { "bxc": 0.0, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": -8.422759595325976, - "ewald": -114.2230327929388, - "exc": -29.649816051328813, - "kin": 62.307064377082234, - "total": -156.37672838114713, - "veff": -70.72982397240823, - "vha": 70.66193120951397, - "vxc": -31.249845663203445 + "eval_sum": -8.422759595370055, + "ewald": -114.22303279293868, + "exc": -29.649816051313678, + "kin": 62.30706437705531, + "total": -156.3767283811499, + "veff": -70.72982397242536, + "vha": 70.66193120942259, + "vxc": -31.249845663183812 }, "fft_coarse_grid": [30,30,30], "fft_grid": [50,50,50], + "forces": [ + [7.623017079836013e-16,6.764406707984662e-16,7.679572962630445e-16], + [-1.3148966962853815e-15,-1.5054957729717361e-15,-1.5890862274539623e-15], + [4.10392152702646e-15,-2.5165770812821727e-16,-2.3742965538811134e-16], + [-2.043976407549946e-16,4.007114833945515e-15,-3.1531998267037307e-16], + [-2.189918557137147e-16,-2.895899792132911e-16,4.013620074327545e-15] + ], "mpi_grid": [1,1], "num_atoms": 5, "num_bands": 40, "num_fv_states": 40, "num_scf_iterations": 14, "omega": 382.708923702537, - "pw_cutoff": 20.0 + "pw_cutoff": 20.0, + "stress": [ + [0.0003510490640174953,-1.9983403963978884e-36,5.38529043878561e-28], + [-1.9983403963978884e-36,0.00035104906401743977,6.731613116190485e-28], + [5.38529043878561e-28,6.731613116190485e-28,0.0003510490640174727] + ] }, "task": 0, "threads_per_rank": 8, "timers": { "Eigensolver_lapack|zheevr": { - "avg": 0.0018181826086956525, + "avg": 0.0018678173913043478, "count": 115, - "max": 0.003252, - "min": 0.000692, - "total": 0.20909100000000003 + "max": 0.00382, + "min": 0.000697, + "total": 0.214799 }, "Eigensolver_lapack|zhegvx": { - "avg": 0.0009865468750000002, + "avg": 0.0014762500000000006, "count": 64, - "max": 0.001306, - "min": 0.000785, - "total": 0.06313900000000001 + "max": 0.031927, + "min": 0.000814, + "total": 0.09448000000000004 }, "sddk::FFT3D::FFT3D": { - "avg": 0.0057155, + "avg": 0.0211795, "count": 2, - "max": 0.007102, - "min": 0.004329, - "total": 0.011431 + "max": 0.038757, + "min": 0.003602, + "total": 0.042359 }, "sddk::FFT3D::prepare": { - "avg": 6.435256410256408e-05, + "avg": 6.830769230769232e-05, "count": 156, - "max": 0.000107, - "min": 4.3e-05, - "total": 0.010038999999999998 + "max": 0.000147, + "min": 4.8e-05, + "total": 0.010656000000000002 }, "sddk::FFT3D::prepare|cpu": { - "avg": 5.8942307692307735e-05, + "avg": 6.296153846153848e-05, "count": 156, - "max": 0.000103, - "min": 4e-05, - "total": 0.009195000000000007 + "max": 0.000142, + "min": 4.3e-05, + "total": 0.009822000000000003 }, "sddk::FFT3D::transform": { - "avg": 0.0009547758149699586, + "avg": 0.0003029742305590963, "count": 10982, - "max": 0.006176, - "min": 0.000568, - "total": 10.485348000000085 + "max": 0.002886, + "min": 0.000222, + "total": 3.3272629999999954 }, "sddk::FFT3D::transform_xy": { - "avg": 0.00019435585503551302, + "avg": 0.00020316372245492717, "count": 10982, - "max": 0.001413, + "max": 0.001099, "min": 0.000145, - "total": 2.134416000000004 + "total": 2.2311440000000102 }, "sddk::FFT3D::transform_z": { - "avg": 0.0007531934984520112, + "avg": 9.263731560735868e-05, "count": 10982, - "max": 0.005804, - "min": 0.000267, - "total": 8.271570999999987 + "max": 0.001775, + "min": 4.7e-05, + "total": 1.017343000000013 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.0007496978692405729, + "avg": 8.951274813330984e-05, "count": 10982, - "max": 0.005801, - "min": 0.000256, - "total": 8.233181999999971 + "max": 0.001771, + "min": 4.4e-05, + "total": 0.9830290000000086 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.0007459017483154249, + "avg": 8.607576033509414e-05, "count": 10982, - "max": 0.005796, - "min": 0.000252, - "total": 8.191492999999996 + "max": 0.001763, + "min": 4e-05, + "total": 0.9452840000000038 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.00061825, + "avg": 0.0006380000000000001, "count": 12, - "max": 0.002138, - "min": 0.000254, - "total": 0.007419 + "max": 0.002289, + "min": 0.000189, + "total": 0.007656000000000001 }, "sddk::Gvec::init": { - "avg": 0.0027040000000000002, + "avg": 0.001982, "count": 6, - "max": 0.011143, - "min": 0.000644, - "total": 0.016224000000000002 + "max": 0.007909, + "min": 0.000443, + "total": 0.011892 }, "sddk::inner": { - "avg": 0.0003027167019027488, + "avg": 0.0003155496828752644, "count": 473, - "max": 0.001518, + "max": 0.001, "min": 6e-06, - "total": 0.14318500000000017 + "total": 0.14925500000000005 }, "sddk::inner|local": { - "avg": 0.00029910359408033814, + "avg": 0.0003115750528541227, "count": 473, - "max": 0.00151, - "min": 4e-06, - "total": 0.14147599999999994 + "max": 0.000995, + "min": 5e-06, + "total": 0.14737500000000003 }, "sddk::matrix_storage::matrix_storage": { - "avg": 1.1763157894736912e-06, + "avg": 5.310000000000007e-05, "count": 380, - "max": 2.4e-05, + "max": 0.000334, "min": 0.0, - "total": 0.0004470000000000026 + "total": 0.020178000000000026 }, "sddk::matrix_storage::remap_backward": { - "avg": 1.474860335195528e-06, + "avg": 1.0837988826815622e-06, "count": 179, - "max": 1.3e-05, + "max": 9e-06, "min": 0.0, - "total": 0.00026399999999999953 + "total": 0.00019399999999999962 }, "sddk::matrix_storage::remap_forward": { - "avg": 5.2887029288703e-06, + "avg": 5.213389121338923e-06, "count": 239, - "max": 9e-06, - "min": 3e-06, - "total": 0.0012640000000000019 + "max": 2.3e-05, + "min": 4e-06, + "total": 0.0012460000000000025 }, "sddk::matrix_storage::set_num_extra": { - "avg": 1.5861244019138823e-06, + "avg": 1.279904306220102e-06, "count": 418, - "max": 5e-06, + "max": 1.6e-05, "min": 0.0, - "total": 0.0006630000000000028 + "total": 0.0005350000000000026 }, "sddk::orthogonalize": { - "avg": 0.001119565217391304, + "avg": 0.0012197565217391302, "count": 115, - "max": 0.003685, - "min": 0.000184, - "total": 0.12874999999999998 + "max": 0.007151, + "min": 0.000178, + "total": 0.14027199999999998 }, "sddk::orthogonalize|tmtrx": { - "avg": 3.5591304347826074e-05, + "avg": 6.569565217391303e-05, "count": 115, - "max": 0.000138, + "max": 0.003182, "min": 2e-06, - "total": 0.0040929999999999985 + "total": 0.007554999999999998 }, "sddk::orthogonalize|transform": { - "avg": 0.00014457391304347823, + "avg": 0.00016772173913043485, "count": 115, - "max": 0.00098, + "max": 0.002314, "min": 1e-05, - "total": 0.016625999999999995 + "total": 0.019288000000000007 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.010679, + "avg": 0.006026, "count": 1, - "max": 0.010679, - "min": 0.010679, - "total": 0.010679 + "max": 0.006026, + "min": 0.006026, + "total": 0.006026 }, "sddk::remap_gvec_to_shells|remap_backward": { - "avg": 0.002863433333333332, + "avg": 0.0004914999999999999, "count": 30, - "max": 0.003383, - "min": 0.002689, - "total": 0.08590299999999997 + "max": 0.00073, + "min": 0.000435, + "total": 0.014744999999999996 }, "sddk::remap_gvec_to_shells|remap_forward": { - "avg": 0.002918333333333333, + "avg": 0.0005368333333333334, "count": 30, - "max": 0.003268, - "min": 0.002743, - "total": 0.08754999999999999 + "max": 0.000858, + "min": 0.000458, + "total": 0.016105 }, "sddk::transform": { - "avg": 0.00039192358803986726, + "avg": 0.0004134318936877077, "count": 301, - "max": 0.001414, - "min": 8.7e-05, - "total": 0.11796900000000005 + "max": 0.001402, + "min": 8.4e-05, + "total": 0.12444300000000001 }, "sddk::transform|init": { - "avg": 6.127574750830566e-05, + "avg": 5.7465116279069835e-05, "count": 301, - "max": 0.000917, + "max": 0.000562, "min": 2e-06, - "total": 0.018444000000000006 + "total": 0.01729700000000002 }, "sddk::transform|local": { - "avg": 0.00014895252679938753, + "avg": 0.00016001378254211328, "count": 653, - "max": 0.00067, - "min": 2.1e-05, - "total": 0.09726600000000005 + "max": 0.000731, + "min": 2e-05, + "total": 0.10448899999999997 }, "sirius::Atom_type::init": { - "avg": 0.019862666666666664, + "avg": 0.018859333333333336, "count": 3, - "max": 0.024945, - "min": 0.012885, - "total": 0.059587999999999995 + "max": 0.023077, + "min": 0.013309, + "total": 0.056578 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.102514, + "avg": 0.10362266666666668, "count": 3, - "max": 0.11544, - "min": 0.079913, - "total": 0.307542 + "max": 0.114482, + "min": 0.082635, + "total": 0.31086800000000003 }, "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { - "avg": 0.08807011111111111, + "avg": 0.08938377777777777, "count": 9, - "max": 0.130552, - "min": 0.046504, - "total": 0.792631 + "max": 0.120611, + "min": 0.054755, + "total": 0.8044539999999999 }, "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { - "avg": 0.08754744444444446, + "avg": 0.07840455555555555, "count": 9, - "max": 0.130518, - "min": 0.04649, - "total": 0.787927 + "max": 0.103534, + "min": 0.046678, + "total": 0.705641 }, "sirius::Augmentation_operator_gvec_deriv|constructor": { - "avg": 0.079369, + "avg": 0.06925, "count": 1, - "max": 0.079369, - "min": 0.079369, - "total": 0.079369 + "max": 0.06925, + "min": 0.06925, + "total": 0.06925 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.16839023333333333, + "avg": 0.0745367833333333, "count": 60, - "max": 0.279066, - "min": 0.073777, - "total": 10.103414 + "max": 0.12676, + "min": 0.035575, + "total": 4.472206999999998 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 2.5649999999999993e-05, + "avg": 0.0003547666666666666, "count": 60, - "max": 6e-05, - "min": 1.9e-05, - "total": 0.0015389999999999996 + "max": 0.000688, + "min": 7.1e-05, + "total": 0.021285999999999996 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.00153452, + "avg": 0.0015718628571428565, "count": 175, - "max": 0.003256, - "min": 0.000695, - "total": 0.268541 + "max": 0.003826, + "min": 0.0007, + "total": 0.2750759999999999 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.16347565000000003, + "avg": 0.06975095, "count": 60, - "max": 0.274857, - "min": 0.069738, - "total": 9.808539000000001 + "max": 0.1226, + "min": 0.031188, + "total": 4.1850570000000005 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 0.0003859666666666667, + "avg": 0.0003937166666666666, "count": 60, - "max": 0.000713, - "min": 0.000168, - "total": 0.023158 + "max": 0.000779, + "min": 0.000172, + "total": 0.023622999999999995 }, "sirius::Band::initialize_subspace": { - "avg": 1.327355, + "avg": 0.175668, "count": 1, - "max": 1.327355, - "min": 1.327355, - "total": 1.327355 + "max": 0.175668, + "min": 0.175668, + "total": 0.175668 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.33119175, + "avg": 0.0437045, "count": 4, - "max": 0.339523, - "min": 0.310803, - "total": 1.324767 + "max": 0.074141, + "min": 0.032278, + "total": 0.174818 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.26093025, + "avg": 0.0010305, "count": 4, - "max": 0.269029, - "min": 0.240725, - "total": 1.043721 + "max": 0.001136, + "min": 0.000986, + "total": 0.004122 }, "sirius::Band::residuals": { - "avg": 0.0004992000000000003, + "avg": 0.0005537028571428573, "count": 175, - "max": 0.001952, - "min": 0.0, - "total": 0.08736000000000005 + "max": 0.001569, + "min": 1e-06, + "total": 0.09689800000000004 }, "sirius::Band::residuals_aux": { - "avg": 0.0003863360655737707, + "avg": 0.0004395491803278689, "count": 122, - "max": 0.000713, - "min": 0.000282, - "total": 0.04713300000000002 + "max": 0.001137, + "min": 0.000292, + "total": 0.053625000000000006 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.0005003333333333334, + "avg": 0.0005140411522633747, "count": 243, - "max": 0.001735, - "min": 0.000219, - "total": 0.12158100000000002 + "max": 0.001213, + "min": 0.000233, + "total": 0.12491200000000004 }, "sirius::Band::solve": { - "avg": 0.6758052666666665, + "avg": 0.2992296, "count": 15, - "max": 1.064881, - "min": 0.346193, - "total": 10.137078999999998 + "max": 0.483783, + "min": 0.150542, + "total": 4.488443999999999 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.002405, + "avg": 0.0024052500000000003, "count": 4, - "max": 0.002519, + "max": 0.002536, "min": 0.002288, - "total": 0.00962 + "total": 0.009621000000000001 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.001169, + "avg": 0.00112425, "count": 4, - "max": 0.001307, - "min": 0.001012, - "total": 0.004676 + "max": 0.001182, + "min": 0.001067, + "total": 0.004497 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 3.714285714285711e-07, + "avg": 3.6428571428571396e-07, "count": 140, "max": 1e-06, "min": 0.0, - "total": 5.199999999999995e-05 + "total": 5.099999999999995e-05 }, "sirius::Beta_projectors_base::generate": { - "avg": 0.0005838076923076923, + "avg": 0.0006006923076923076, "count": 52, - "max": 0.00132, - "min": 0.000481, - "total": 0.030358 + "max": 0.001443, + "min": 0.000461, + "total": 0.031235999999999996 }, "sirius::Beta_projectors_base::inner": { - "avg": 0.0004449254237288137, + "avg": 0.0004678406779661015, "count": 295, - "max": 0.000964, - "min": 8.5e-05, - "total": 0.13125300000000004 + "max": 0.004464, + "min": 8.3e-05, + "total": 0.13801299999999994 }, "sirius::Beta_projectors_base::local_inner_aux": { - "avg": 0.0004405220338983051, + "avg": 0.00046320677966101654, "count": 295, - "max": 0.000958, - "min": 8.2e-05, - "total": 0.12995400000000001 + "max": 0.004456, + "min": 7.9e-05, + "total": 0.13664599999999988 }, "sirius::Beta_projectors_base::prepare": { - "avg": 1.875e-06, + "avg": 1.6875e-05, "count": 8, - "max": 3e-06, + "max": 0.000109, "min": 1e-06, - "total": 1.5e-05 + "total": 0.000135 }, "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { - "avg": 0.0053775, + "avg": 0.00510275, "count": 4, - "max": 0.007449, - "min": 0.004526, - "total": 0.02151 + "max": 0.005382, + "min": 0.004833, + "total": 0.020411 }, "sirius::Broyden1::mix": { - "avg": 0.0016346666666666666, + "avg": 0.0018772666666666666, "count": 15, - "max": 0.003124, - "min": 7.7e-05, - "total": 0.02452 + "max": 0.003077, + "min": 7.1e-05, + "total": 0.028159 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.102658, - "count": 1, - "max": 0.102658, - "min": 0.102658, - "total": 0.102658 - }, - "sirius::DFT_ground_state::forces": { - "avg": 0.00018, + "avg": 0.000555, "count": 1, - "max": 0.00018, - "min": 0.00018, - "total": 0.00018 + "max": 0.000555, + "min": 0.000555, + "total": 0.000555 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 25.534024, + "avg": 6.991784, "count": 1, - "max": 25.534024, - "min": 25.534024, - "total": 25.534024 + "max": 6.991784, + "min": 6.991784, + "total": 6.991784 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 1.7018366, + "avg": 0.4617274666666667, "count": 15, - "max": 2.078889, - "min": 1.373096, - "total": 25.527549 + "max": 0.64627, + "min": 0.308045, + "total": 6.925912 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.0006076833333333331, + "avg": 0.0006349999999999998, "count": 60, - "max": 0.000872, - "min": 0.000505, - "total": 0.036460999999999986 + "max": 0.002119, + "min": 0.000519, + "total": 0.03809999999999999 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.02487768333333332, + "avg": 0.009068416666666664, "count": 60, - "max": 0.028735, - "min": 0.022601, - "total": 1.4926609999999991 + "max": 0.010933, + "min": 0.007558, + "total": 0.5441049999999998 }, "sirius::Density::augment": { - "avg": 0.39799106666666667, + "avg": 0.07862346666666667, "count": 15, - "max": 0.465113, - "min": 0.381322, - "total": 5.969866 + "max": 0.104834, + "min": 0.07294, + "total": 1.179352 }, "sirius::Density::generate": { - "avg": 0.5022124666666666, + "avg": 0.11801620000000002, "count": 15, - "max": 0.565749, - "min": 0.481721, - "total": 7.533187 + "max": 0.143541, + "min": 0.111209, + "total": 1.7702430000000002 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.108669, + "avg": 0.001535, "count": 1, - "max": 0.108669, - "min": 0.108669, - "total": 0.108669 + "max": 0.001535, + "min": 0.001535, + "total": 0.001535 }, "sirius::Density::generate_rho_aug": { - "avg": 0.3978982, + "avg": 0.07853473333333334, "count": 15, - "max": 0.464949, - "min": 0.381199, - "total": 5.9684729999999995 + "max": 0.104697, + "min": 0.072861, + "total": 1.1780210000000002 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.007760111111111112, + "avg": 0.011881733333333335, "count": 45, - "max": 0.045652, - "min": 0.002519, - "total": 0.34920500000000004 + "max": 0.048218, + "min": 0.005019, + "total": 0.5346780000000001 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.004726444444444445, + "avg": 0.004354355555555555, "count": 45, - "max": 0.012063, - "min": 0.002319, - "total": 0.21269 + "max": 0.007171, + "min": 0.002109, + "total": 0.195946 }, "sirius::Density::generate_valence": { - "avg": 0.5022075333333332, + "avg": 0.11801139999999997, "count": 15, - "max": 0.565741, - "min": 0.481716, - "total": 7.533112999999999 + "max": 0.143531, + "min": 0.111206, + "total": 1.7701709999999997 }, "sirius::Density::initial_density": { - "avg": 0.11192, + "avg": 0.00245, "count": 1, - "max": 0.11192, - "min": 0.11192, - "total": 0.11192 + "max": 0.00245, + "min": 0.00245, + "total": 0.00245 }, "sirius::Density::symmetrize_density_matrix": { - "avg": 0.0031741999999999994, + "avg": 0.003410533333333333, "count": 15, - "max": 0.004361, - "min": 0.002947, - "total": 0.04761299999999999 + "max": 0.004195, + "min": 0.003154, + "total": 0.051157999999999995 }, "sirius::Density::update": { - "avg": 0.108698, + "avg": 0.001561, "count": 1, - "max": 0.108698, - "min": 0.108698, - "total": 0.108698 + "max": 0.001561, + "min": 0.001561, + "total": 0.001561 }, "sirius::Field4D::symmetrize": { - "avg": 0.0113101, + "avg": 0.006703933333333333, "count": 30, - "max": 0.013979, - "min": 0.010744, - "total": 0.339303 + "max": 0.009233, + "min": 0.006018, + "total": 0.201118 }, "sirius::Force::calc_forces_core": { - "avg": 0.008043, + "avg": 0.003865, "count": 1, - "max": 0.008043, - "min": 0.008043, - "total": 0.008043 + "max": 0.003865, + "min": 0.003865, + "total": 0.003865 }, "sirius::Force::calc_forces_ewald": { - "avg": 1.158306, + "avg": 0.003008, "count": 1, - "max": 1.158306, - "min": 1.158306, - "total": 1.158306 + "max": 0.003008, + "min": 0.003008, + "total": 0.003008 }, "sirius::Force::calc_forces_nonloc": { - "avg": 0.017538, + "avg": 0.017591, "count": 1, - "max": 0.017538, - "min": 0.017538, - "total": 0.017538 + "max": 0.017591, + "min": 0.017591, + "total": 0.017591 }, "sirius::Force::calc_forces_scf_corr": { - "avg": 0.002007, + "avg": 0.002209, "count": 1, - "max": 0.002007, - "min": 0.002007, - "total": 0.002007 + "max": 0.002209, + "min": 0.002209, + "total": 0.002209 }, "sirius::Force::calc_forces_us": { - "avg": 1.178096, + "avg": 0.053991, "count": 1, - "max": 1.178096, - "min": 1.178096, - "total": 1.178096 + "max": 0.053991, + "min": 0.053991, + "total": 0.053991 }, "sirius::Force::calc_forces_vloc": { - "avg": 0.0031580000000000002, + "avg": 0.002814, "count": 1, - "max": 0.0031580000000000002, - "min": 0.0031580000000000002, - "total": 0.0031580000000000002 + "max": 0.002814, + "min": 0.002814, + "total": 0.002814 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.0527160502793296, + "avg": 0.02031057541899442, "count": 179, - "max": 0.08811, - "min": 0.002199, - "total": 9.436172999999998 + "max": 0.039277, + "min": 0.000957, + "total": 3.635593000000001 }, "sirius::Hamiltonian::get_h_diag": { - "avg": 0.0024416666666666666, + "avg": 0.0018961833333333337, "count": 60, - "max": 0.003292, - "min": 0.001973, - "total": 0.1465 + "max": 0.002768, + "min": 0.00149, + "total": 0.11377100000000002 }, "sirius::Hamiltonian::get_o_diag": { - "avg": 0.0023734, + "avg": 0.001891283333333334, "count": 60, - "max": 0.00327, - "min": 0.001958, - "total": 0.142404 + "max": 0.002954, + "min": 0.001449, + "total": 0.11347700000000004 }, "sirius::K_point::K_point": { - "avg": 1e-06, + "avg": 1.25e-06, "count": 4, - "max": 2e-06, + "max": 5e-06, "min": 0.0, - "total": 4e-06 + "total": 5e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.00081675, + "avg": 0.0005887500000000001, "count": 4, - "max": 0.000851, - "min": 0.00079, - "total": 0.003267 + "max": 0.000678, + "min": 0.000461, + "total": 0.0023550000000000003 }, "sirius::K_point::initialize": { - "avg": 0.00384975, + "avg": 0.0033545, "count": 4, - "max": 0.003956, - "min": 0.003693, - "total": 0.015399 + "max": 0.003436, + "min": 0.003313, + "total": 0.013418 }, "sirius::K_point::update": { - "avg": 0.0028717499999999997, + "avg": 0.0027175, "count": 4, - "max": 0.002964, - "min": 0.002731, - "total": 0.011486999999999999 + "max": 0.002829, + "min": 0.002635, + "total": 0.01087 }, "sirius::K_point_set::add_kpoint": { - "avg": 5.5e-06, + "avg": 5.500000000000001e-06, "count": 4, - "max": 1.4e-05, + "max": 1.5e-05, "min": 2e-06, - "total": 2.2e-05 + "total": 2.2000000000000003e-05 }, "sirius::K_point_set::create_k_mesh": { - "avg": 0.020799, + "avg": 0.023704, "count": 1, - "max": 0.020799, - "min": 0.020799, - "total": 0.020799 + "max": 0.023704, + "min": 0.023704, + "total": 0.023704 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 0.0003612666666666667, + "avg": 0.00041066666666666666, "count": 15, - "max": 0.0005, - "min": 0.000306, - "total": 0.005419 + "max": 0.00052, + "min": 0.000333, + "total": 0.00616 }, "sirius::K_point_set::initialize": { - "avg": 0.015842, + "avg": 0.01348, "count": 1, - "max": 0.015842, - "min": 0.015842, - "total": 0.015842 + "max": 0.01348, + "min": 0.01348, + "total": 0.01348 }, "sirius::K_point_set::sync_band_energies": { - "avg": 1.1533333333333332e-05, + "avg": 6.666666666666666e-06, "count": 15, - "max": 1.6e-05, - "min": 9e-06, - "total": 0.00017299999999999998 + "max": 1.2e-05, + "min": 5e-06, + "total": 9.999999999999999e-05 }, "sirius::Local_operator::apply_h": { - "avg": 0.051635027932960896, + "avg": 0.019144497206703914, "count": 179, - "max": 0.086334, - "min": 0.001889, - "total": 9.24267 + "max": 0.033835, + "min": 0.00067, + "total": 3.4268650000000007 }, "sirius::Local_operator::prepare": { - "avg": 0.0003649625000000002, + "avg": 0.0001335, "count": 80, - "max": 0.002495, - "min": 3.3e-05, - "total": 0.029197000000000015 + "max": 0.000761, + "min": 3.6e-05, + "total": 0.010679999999999999 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 1.40625e-06, + "avg": 1.28125e-06, "count": 32, - "max": 6e-06, + "max": 2e-06, "min": 0.0, - "total": 4.5e-05 + "total": 4.1e-05 }, "sirius::Non_local_operator::apply": { - "avg": 0.0002723296089385477, + "avg": 0.0002981955307262573, "count": 358, - "max": 0.000738, - "min": 8.8e-05, - "total": 0.0974940000000001 + "max": 0.004829, + "min": 8.9e-05, + "total": 0.1067540000000001 }, "sirius::Periodic_function::add": { - "avg": 8.553124999999998e-05, + "avg": 8.956250000000003e-05, "count": 32, - "max": 0.000131, - "min": 5.4e-05, - "total": 0.0027369999999999994 + "max": 0.000261, + "min": 5.8e-05, + "total": 0.002866000000000001 }, "sirius::Periodic_function::inner": { - "avg": 8.822480620155037e-05, + "avg": 9.006976744186042e-05, "count": 129, - "max": 0.000166, - "min": 5.9e-05, - "total": 0.011380999999999997 + "max": 0.000165, + "min": 6.4e-05, + "total": 0.011618999999999994 }, "sirius::Periodic_function::integrate": { - "avg": 7.125e-05, + "avg": 7.60625e-05, "count": 16, - "max": 0.000133, - "min": 5.6e-05, - "total": 0.00114 + "max": 8.9e-05, + "min": 6.8e-05, + "total": 0.001217 }, "sirius::Potential::Potential": { - "avg": 0.115901, + "avg": 0.010544, "count": 1, - "max": 0.115901, - "min": 0.115901, - "total": 0.115901 + "max": 0.010544, + "min": 0.010544, + "total": 0.010544 }, "sirius::Potential::generate": { - "avg": 0.4828375624999999, + "avg": 0.022979625000000004, "count": 16, - "max": 0.499222, - "min": 0.463468, - "total": 7.725400999999998 + "max": 0.030275, + "min": 0.020665, + "total": 0.36767400000000006 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.34407325, + "avg": 0.0156644375, "count": 16, - "max": 0.35629, - "min": 0.336126, - "total": 5.505172 + "max": 0.019807, + "min": 0.014117, + "total": 0.250631 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 3.1249999999999997e-07, + "avg": 5e-07, "count": 16, "max": 1e-06, "min": 0.0, - "total": 4.9999999999999996e-06 + "total": 8e-06 }, "sirius::Potential::generate_local_potential": { - "avg": 0.11044, + "avg": 0.004075, "count": 1, - "max": 0.11044, - "min": 0.11044, - "total": 0.11044 + "max": 0.004075, + "min": 0.004075, + "total": 0.004075 }, "sirius::Potential::poisson": { - "avg": 0.12600962500000001, + "avg": 0.000937625, "count": 16, - "max": 0.132948, - "min": 0.107161, - "total": 2.0161540000000002 + "max": 0.001197, + "min": 0.000826, + "total": 0.015002 }, "sirius::Potential::update": { - "avg": 0.110467, + "avg": 0.004108, "count": 1, - "max": 0.110467, - "min": 0.110467, - "total": 0.110467 + "max": 0.004108, + "min": 0.004108, + "total": 0.004108 }, "sirius::Potential::xc": { - "avg": 0.008203375, + "avg": 0.005247187499999999, "count": 16, - "max": 0.010487, - "min": 0.00742, - "total": 0.131254 + "max": 0.00836, + "min": 0.004077, + "total": 0.08395499999999999 }, "sirius::Potential::xc_rg_nonmagnetic": { - "avg": 0.00819925, + "avg": 0.005242125, "count": 16, - "max": 0.01048, - "min": 0.007416, - "total": 0.131188 + "max": 0.00835, + "min": 0.004073, + "total": 0.083874 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.2117735, + "avg": 0.216974, "count": 2, - "max": 0.214678, - "min": 0.208869, - "total": 0.423547 + "max": 0.220943, + "min": 0.213005, + "total": 0.433948 }, "sirius::Radial_integrals|aug": { - "avg": 1.093219, + "avg": 1.082949, "count": 2, - "max": 1.239314, - "min": 0.947124, - "total": 2.186438 + "max": 1.200966, + "min": 0.964932, + "total": 2.165898 }, "sirius::Radial_integrals|beta": { - "avg": 0.23902, + "avg": 0.235486, "count": 2, - "max": 0.250511, - "min": 0.227529, - "total": 0.47804 + "max": 0.241457, + "min": 0.229515, + "total": 0.470972 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.054136000000000004, + "avg": 0.052349, "count": 2, - "max": 0.059128, - "min": 0.049144, - "total": 0.10827200000000001 + "max": 0.056054, + "min": 0.048644, + "total": 0.104698 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.0464, + "avg": 0.044477, "count": 1, - "max": 0.0464, - "min": 0.0464, - "total": 0.0464 + "max": 0.044477, + "min": 0.044477, + "total": 0.044477 }, "sirius::Radial_integrals|vloc": { - "avg": 0.16929450000000001, + "avg": 0.16681600000000002, "count": 2, - "max": 0.173874, - "min": 0.164715, - "total": 0.33858900000000003 + "max": 0.170378, + "min": 0.163254, + "total": 0.33363200000000004 }, "sirius::Simulation_context::init_atoms_to_grid_idx": { - "avg": 0.000799, + "avg": 0.00093, "count": 1, - "max": 0.000799, - "min": 0.000799, - "total": 0.000799 + "max": 0.00093, + "min": 0.00093, + "total": 0.00093 }, "sirius::Simulation_context::init_comm": { - "avg": 0.000634, + "avg": 0.00024, "count": 1, - "max": 0.000634, - "min": 0.000634, - "total": 0.000634 + "max": 0.00024, + "min": 0.00024, + "total": 0.00024 }, "sirius::Simulation_context::init_fft": { - "avg": 0.136913, + "avg": 0.058863, "count": 1, - "max": 0.136913, - "min": 0.136913, - "total": 0.136913 + "max": 0.058863, + "min": 0.058863, + "total": 0.058863 }, "sirius::Simulation_context::initialize": { - "avg": 4.223186, + "avg": 4.018584, "count": 1, - "max": 4.223186, - "min": 4.223186, - "total": 4.223186 + "max": 4.018584, + "min": 4.018584, + "total": 4.018584 }, "sirius::Simulation_context::make_periodic_function": { - "avg": 0.10471133333333332, + "avg": 0.0006656666666666667, "count": 6, - "max": 0.107326, - "min": 0.102338, - "total": 0.6282679999999999 + "max": 0.000962, + "min": 0.00035, + "total": 0.003994 }, "sirius::Simulation_context::update": { - "avg": 0.426644, + "avg": 0.328618, "count": 1, - "max": 0.426644, - "min": 0.426644, - "total": 0.426644 + "max": 0.328618, + "min": 0.328618, + "total": 0.328618 }, "sirius::Simulation_parameters::import": { - "avg": 0.000357, + "avg": 0.000192, "count": 1, - "max": 0.000357, - "min": 0.000357, - "total": 0.000357 + "max": 0.000192, + "min": 0.000192, + "total": 0.000192 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.003842483050847457, + "avg": 0.0006723305084745764, "count": 118, - "max": 0.006344, - "min": 0.001384, - "total": 0.45341299999999995 + "max": 0.003028, + "min": 0.000298, + "total": 0.07933500000000002 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 8.15e-05, + "avg": 6.65e-05, "count": 2, - "max": 8.6e-05, - "min": 7.7e-05, - "total": 0.000163 + "max": 6.7e-05, + "min": 6.6e-05, + "total": 0.000133 }, "sirius::Smooth_periodic_function|inner": { - "avg": 8.340677966101693e-05, + "avg": 8.654802259887007e-05, "count": 177, - "max": 0.000161, - "min": 5.8e-05, - "total": 0.014762999999999998 + "max": 0.00016, + "min": 6.3e-05, + "total": 0.015319000000000003 }, "sirius::Stress|ewald": { - "avg": 0.004718, + "avg": 0.00243, "count": 1, - "max": 0.004718, - "min": 0.004718, - "total": 0.004718 + "max": 0.00243, + "min": 0.00243, + "total": 0.00243 }, "sirius::Stress|har": { - "avg": 0.001918, + "avg": 0.000745, "count": 1, - "max": 0.001918, - "min": 0.001918, - "total": 0.001918 + "max": 0.000745, + "min": 0.000745, + "total": 0.000745 }, "sirius::Stress|kin": { - "avg": 0.001108, + "avg": 0.001454, "count": 1, - "max": 0.001108, - "min": 0.001108, - "total": 0.001108 + "max": 0.001454, + "min": 0.001454, + "total": 0.001454 }, "sirius::Stress|nonloc": { - "avg": 0.069113, + "avg": 0.071846, "count": 1, - "max": 0.069113, - "min": 0.069113, - "total": 0.069113 + "max": 0.071846, + "min": 0.071846, + "total": 0.071846 }, "sirius::Stress|us": { - "avg": 1.36738, + "avg": 0.975365, "count": 1, - "max": 1.36738, - "min": 1.36738, - "total": 1.36738 + "max": 0.975365, + "min": 0.975365, + "total": 0.975365 }, "sirius::Stress|us|gemm": { - "avg": 0.0032912592592592585, + "avg": 0.0031696296296296303, "count": 27, - "max": 0.005383, - "min": 0.002349, - "total": 0.08886399999999998 + "max": 0.00992, + "min": 0.002416, + "total": 0.08558000000000002 }, "sirius::Stress|us|phase_fac": { - "avg": 0.125557, + "avg": 0.00021733333333333335, "count": 3, - "max": 0.13091, - "min": 0.121465, - "total": 0.37667100000000003 + "max": 0.000337, + "min": 0.000157, + "total": 0.000652 }, "sirius::Stress|us|prepare": { - "avg": 0.0002542962962962963, + "avg": 0.00022974074074074075, "count": 27, - "max": 0.000594, - "min": 0.000149, - "total": 0.006866000000000001 + "max": 0.00065, + "min": 0.000158, + "total": 0.006203 }, "sirius::Stress|vloc": { - "avg": 0.211811, + "avg": 0.002302, "count": 1, - "max": 0.211811, - "min": 0.211811, - "total": 0.211811 + "max": 0.002302, + "min": 0.002302, + "total": 0.002302 }, "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.00033350000000000003, + "avg": 0.00021349999999999999, "count": 2, - "max": 0.000477, - "min": 0.00019, - "total": 0.0006670000000000001 + "max": 0.000287, + "min": 0.00014, + "total": 0.00042699999999999997 }, "sirius::Unit_cell::get_symmetry": { - "avg": 0.0061875, + "avg": 0.011754500000000001, "count": 2, - "max": 0.006865, - "min": 0.00551, - "total": 0.012375 + "max": 0.012343, + "min": 0.011166, + "total": 0.023509000000000002 }, "sirius::Unit_cell::initialize": { - "avg": 0.06699, + "avg": 0.069253, "count": 1, - "max": 0.06699, - "min": 0.06699, - "total": 0.06699 + "max": 0.069253, + "min": 0.069253, + "total": 0.069253 }, "sirius::Unit_cell::update": { - "avg": 0.006532, + "avg": 0.0119765, "count": 2, - "max": 0.007358, - "min": 0.005706, - "total": 0.013064 + "max": 0.012639, + "min": 0.011314, + "total": 0.023953 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { - "avg": 0.0061235, + "avg": 0.0117035, "count": 2, - "max": 0.006813, - "min": 0.005434, - "total": 0.012247 + "max": 0.012319, + "min": 0.011088, + "total": 0.023407 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { - "avg": 0.005892, + "avg": 0.0114765, "count": 2, - "max": 0.006554, - "min": 0.00523, - "total": 0.011784 + "max": 0.012058, + "min": 0.010895, + "total": 0.022953 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { - "avg": 6.75e-05, + "avg": 4.55e-05, "count": 2, - "max": 0.000109, - "min": 2.6e-05, - "total": 0.000135 + "max": 6.2e-05, + "min": 2.9e-05, + "total": 9.1e-05 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { - "avg": 0.0001215, + "avg": 0.0001535, "count": 2, - "max": 0.000161, - "min": 8.2e-05, - "total": 0.000243 + "max": 0.000163, + "min": 0.000144, + "total": 0.000307 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { - "avg": 3e-05, + "avg": 1.7500000000000002e-05, "count": 2, - "max": 5e-05, + "max": 2.5e-05, "min": 1e-05, - "total": 6e-05 + "total": 3.5000000000000004e-05 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw": { - "avg": 0.011304966666666666, + "avg": 0.0066991666666666675, "count": 30, - "max": 0.013973, - "min": 0.01074, - "total": 0.339149 + "max": 0.009228, + "min": 0.006014, + "total": 0.20097500000000001 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw|local": { - "avg": 0.005425433333333333, + "avg": 0.005586066666666666, "count": 30, - "max": 0.007703, - "min": 0.005058, - "total": 0.162763 + "max": 0.007979, + "min": 0.004987, + "total": 0.16758199999999998 } } } \ No newline at end of file diff --git a/verification/test10/output_ref.json b/verification/test10/output_ref.json index 5e45c4a00..836ef0d15 100644 --- a/verification/test10/output_ref.json +++ b/verification/test10/output_ref.json @@ -4,7 +4,7 @@ "band_evp_work_count": 3271.105627642516, "local_operator_num_applied": 8645 }, - "git_hash": "2e3b15c5ba596aa7175f83b0cfdbc56168d6e822", + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, "band_gap": 0.0, @@ -13,646 +13,717 @@ "core_leakage": 0.0, "efermi": 0.7439107345140094, "energy": { - "bxc": -0.0016892959417840673, + "bxc": -0.001689295941783988, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": -7.765880750422611, + "eval_sum": -7.765880750422609, "ewald": -107.33691180390963, - "exc": -24.931047145445085, - "kin": 44.350618949871304, - "total": -146.56680214553, - "veff": -52.11481040435212, - "vha": 50.00282012902576, - "vxc": -18.466758322818414 + "exc": -24.931047145445117, + "kin": 44.35061894987132, + "total": -146.56680214553003, + "veff": -52.11481040435214, + "vha": 50.00282012902583, + "vxc": -18.46675832281846 }, "fft_coarse_grid": [20,20,20], "fft_grid": [40,40,40], + "forces": [ + [1.8097592311194697e-09,4.576405625057385e-09,4.0090803451392055e-10] + ], "mpi_grid": [1,1], "num_atoms": 1, "num_bands": 38, "num_fv_states": -1, "num_scf_iterations": 13, "omega": 114.57850275, - "pw_cutoff": 20.0 + "pw_cutoff": 20.0, + "stress": [ + [0.06106468584106989,-6.508415422002945e-07,-3.3733747232491953e-10], + [-6.508415421556854e-07,0.06107273449131814,5.036938167067074e-10], + [-3.3733750438312887e-10,5.036937349692991e-10,0.06106354866871905] + ] }, "task": 0, "threads_per_rank": 8, "timers": { "Eigensolver_lapack|zheevr": { - "avg": 0.0018859351851851847, + "avg": 0.0019063611111111115, "count": 216, - "max": 0.003935, - "min": 0.000837, - "total": 0.4073619999999999 + "max": 0.003999, + "min": 0.000749, + "total": 0.4117740000000001 }, "Eigensolver_lapack|zhegvx": { - "avg": 0.0009548833333333332, + "avg": 0.0009374333333333336, "count": 120, - "max": 0.001234, - "min": 0.000816, - "total": 0.11458599999999998 + "max": 0.001671, + "min": 0.000822, + "total": 0.11249200000000004 }, "sddk::FFT3D::FFT3D": { - "avg": 0.003144, + "avg": 0.0034755, "count": 2, - "max": 0.004881, - "min": 0.001407, - "total": 0.006288 + "max": 0.005423, + "min": 0.001528, + "total": 0.006951 }, "sddk::FFT3D::prepare": { - "avg": 6.33244274809161e-05, + "avg": 6.60916030534351e-05, "count": 262, - "max": 0.000107, + "max": 0.000175, "min": 4.1e-05, - "total": 0.016591000000000015 + "total": 0.017315999999999995 }, "sddk::FFT3D::prepare|cpu": { - "avg": 5.832061068702289e-05, + "avg": 6.141221374045806e-05, "count": 262, - "max": 0.000101, - "min": 3.6e-05, - "total": 0.015279999999999998 + "max": 0.000163, + "min": 3.8e-05, + "total": 0.01609000000000001 }, "sddk::FFT3D::transform": { - "avg": 0.00030590444758207, - "count": 56660, - "max": 0.00378, - "min": 0.000102, - "total": 17.332546000000086 + "avg": 0.00014230135342590904, + "count": 56671, + "max": 0.001226, + "min": 8.5e-05, + "total": 8.064359999999692 }, "sddk::FFT3D::transform_xy": { - "avg": 6.124592304976683e-05, - "count": 56660, - "max": 0.000663, - "min": 3.6e-05, - "total": 3.4701939999997884 + "avg": 6.283783593018864e-05, + "count": 56671, + "max": 0.000777, + "min": 3.4e-05, + "total": 3.5610829999997207 }, "sddk::FFT3D::transform_z": { - "avg": 0.00023820510060006608, - "count": 56660, - "max": 0.003479, - "min": 5e-05, - "total": 13.496700999999744 + "avg": 7.378992782904985e-05, + "count": 56671, + "max": 0.000441, + "min": 3.7e-05, + "total": 4.181749000000083 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.00023498406283091724, - "count": 56660, - "max": 0.003476, - "min": 4.8e-05, - "total": 13.314196999999771 + "avg": 7.136244287201694e-05, + "count": 56671, + "max": 0.00042, + "min": 3.6e-05, + "total": 4.044181000000072 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.0002315940698905707, - "count": 56660, - "max": 0.003472, - "min": 4.4e-05, - "total": 13.122119999999736 + "avg": 6.866968996488673e-05, + "count": 56671, + "max": 0.000383, + "min": 3.3e-05, + "total": 3.8915800000000957 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.0001299, + "avg": 0.00018324999999999998, "count": 20, - "max": 0.000684, - "min": 4.9e-05, - "total": 0.002598 + "max": 0.00074, + "min": 7.1e-05, + "total": 0.0036649999999999994 }, "sddk::Gvec::init": { - "avg": 0.0005598, + "avg": 0.0004822999999999999, "count": 10, - "max": 0.003802, - "min": 0.000154, - "total": 0.005598 + "max": 0.002616, + "min": 0.000144, + "total": 0.004822999999999999 }, "sddk::inner": { - "avg": 0.00011383896396396393, + "avg": 0.00011672860360360355, "count": 888, - "max": 0.000336, + "max": 0.000349, "min": 3e-06, - "total": 0.10108899999999997 + "total": 0.10365499999999996 }, "sddk::inner|local": { - "avg": 0.00011074324324324337, + "avg": 0.00011346734234234232, "count": 888, - "max": 0.000333, + "max": 0.000344, "min": 2e-06, - "total": 0.09834000000000011 + "total": 0.10075899999999997 }, "sddk::matrix_storage::matrix_storage": { - "avg": 8.91853932584268e-07, + "avg": 8.813202247190993e-07, "count": 1424, - "max": 1.5e-05, + "max": 1.6e-05, "min": 0.0, - "total": 0.0012699999999999977 + "total": 0.0012549999999999974 }, "sddk::matrix_storage::remap_backward": { - "avg": 7.931547619047681e-07, + "avg": 6.339285714285765e-07, "count": 672, "max": 7e-06, "min": 0.0, - "total": 0.0005330000000000042 + "total": 0.0004260000000000034 }, "sddk::matrix_storage::remap_forward": { - "avg": 3.415178571428556e-06, + "avg": 3.3102678571428454e-06, "count": 896, - "max": 1.2e-05, + "max": 2.3e-05, "min": 1e-06, - "total": 0.0030599999999999863 + "total": 0.0029659999999999895 }, "sddk::matrix_storage::set_num_extra": { - "avg": 9.649234693877503e-07, + "avg": 8.20790816326529e-07, "count": 1568, - "max": 4e-06, + "max": 2e-05, "min": 0.0, - "total": 0.0015129999999999924 + "total": 0.0012869999999999973 }, "sddk::orthogonalize": { - "avg": 0.0005052731481481482, + "avg": 0.000532824074074074, "count": 216, - "max": 0.001127, - "min": 0.000117, - "total": 0.10913900000000001 + "max": 0.001281, + "min": 0.000107, + "total": 0.11508999999999998 }, "sddk::orthogonalize|tmtrx": { - "avg": 3.520833333333339e-05, + "avg": 3.724537037037042e-05, "count": 216, - "max": 0.000156, + "max": 0.000181, "min": 1e-06, - "total": 0.007605000000000012 + "total": 0.00804500000000001 }, "sddk::orthogonalize|transform": { - "avg": 6.465740740740744e-05, + "avg": 6.712962962962968e-05, "count": 216, - "max": 0.0002, + "max": 0.000227, "min": 4e-06, - "total": 0.013966000000000006 + "total": 0.014500000000000011 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.003425, + "avg": 0.001768, "count": 1, - "max": 0.003425, - "min": 0.003425, - "total": 0.003425 + "max": 0.001768, + "min": 0.001768, + "total": 0.001768 }, "sddk::transform": { - "avg": 0.0001788888888888889, + "avg": 0.0001888727598566311, "count": 558, - "max": 0.000561, - "min": 5.5e-05, - "total": 0.09982000000000002 + "max": 0.000589, + "min": 5.6e-05, + "total": 0.10539100000000015 }, "sddk::transform|init": { - "avg": 2.0781362007168467e-05, + "avg": 2.0492831541218657e-05, "count": 558, - "max": 0.000308, + "max": 0.000203, "min": 1e-06, - "total": 0.011596000000000004 + "total": 0.011435000000000011 }, "sddk::transform|local": { - "avg": 6.977310231023118e-05, + "avg": 7.433993399339938e-05, "count": 1212, - "max": 0.000288, - "min": 1.6e-05, - "total": 0.0845650000000002 + "max": 0.000512, + "min": 1.5e-05, + "total": 0.09010000000000006 }, "sirius::Atom_type::init": { - "avg": 0.006034, + "avg": 0.009748, "count": 1, - "max": 0.006034, - "min": 0.006034, - "total": 0.006034 + "max": 0.009748, + "min": 0.009748, + "total": 0.009748 + }, + "sirius::Augmentation_operator_gvec_deriv|constructor": { + "avg": 0.554318, + "count": 1, + "max": 0.554318, + "min": 0.554318, + "total": 0.554318 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.1840236785714288, + "avg": 0.11394766964285716, "count": 112, - "max": 0.384474, - "min": 0.094066, - "total": 20.610652000000023 + "max": 0.238267, + "min": 0.054996, + "total": 12.762139000000001 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 3.5758928571428596e-05, + "avg": 3.689285714285716e-05, "count": 112, - "max": 0.0001, - "min": 2.9e-05, - "total": 0.0040050000000000025 + "max": 7.3e-05, + "min": 3e-05, + "total": 0.004132000000000002 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.0015721402439024378, + "avg": 0.001579085365853659, "count": 328, - "max": 0.003938, - "min": 0.000819, - "total": 0.5156619999999996 + "max": 0.004004, + "min": 0.000752, + "total": 0.5179400000000002 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.1821559821428571, + "avg": 0.11216968750000005, "count": 112, - "max": 0.381589, - "min": 0.091958, - "total": 20.401469999999996 + "max": 0.236765, + "min": 0.053283, + "total": 12.563005000000006 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 0.00014790178571428569, + "avg": 0.00015933928571428569, "count": 112, - "max": 0.000309, - "min": 9.8e-05, - "total": 0.016564999999999996 + "max": 0.000571, + "min": 8.7e-05, + "total": 0.017845999999999997 }, "sirius::Band::initialize_subspace": { - "avg": 0.836618, + "avg": 0.42515, "count": 1, - "max": 0.836618, - "min": 0.836618, - "total": 0.836618 + "max": 0.42515, + "min": 0.42515, + "total": 0.42515 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.10427625, + "avg": 0.053024375, "count": 8, - "max": 0.110564, - "min": 0.097259, - "total": 0.83421 + "max": 0.055566, + "min": 0.044235, + "total": 0.424195 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.020775375, + "avg": 0.000322375, "count": 8, - "max": 0.022058, - "min": 0.01934, - "total": 0.166203 + "max": 0.000389, + "min": 0.000294, + "total": 0.002579 }, "sirius::Band::residuals": { - "avg": 0.0005077225609756102, + "avg": 0.0005241006097560976, "count": 328, - "max": 0.001191, + "max": 0.001046, "min": 0.0, - "total": 0.16653300000000015 + "total": 0.171905 }, "sirius::Band::residuals_aux": { - "avg": 0.0005822072072072074, + "avg": 0.0006035540540540543, "count": 222, - "max": 0.000704, - "min": 0.000486, - "total": 0.12925000000000003 + "max": 0.000793, + "min": 0.000505, + "total": 0.13398900000000005 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.0002607543859649123, + "avg": 0.0002686469298245614, "count": 456, - "max": 0.000512, - "min": 0.000131, - "total": 0.11890400000000001 + "max": 0.000579, + "min": 0.000129, + "total": 0.122503 }, "sirius::Band::solve": { - "avg": 1.4752307142857135, + "avg": 0.9134030714285715, "count": 14, - "max": 2.790815, - "min": 0.844216, - "total": 20.65322999999999 + "max": 1.7911300000000001, + "min": 0.457184, + "total": 12.787643000000001 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.000479375, + "avg": 0.000643125, "count": 8, - "max": 0.000676, - "min": 0.00041, - "total": 0.003835 + "max": 0.000805, + "min": 0.000533, + "total": 0.005145 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.000345125, + "avg": 0.000435875, "count": 8, - "max": 0.000531, - "min": 0.00027, - "total": 0.002761 + "max": 0.000524, + "min": 0.000346, + "total": 0.003487 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 3.189655172413788e-07, - "count": 232, + "avg": 2.9924242424242377e-07, + "count": 264, "max": 1e-06, "min": 0.0, - "total": 7.399999999999989e-05 + "total": 7.899999999999987e-05 }, "sirius::Beta_projectors_base::generate": { - "avg": 0.000124125, - "count": 8, - "max": 0.000132, - "min": 0.000115, - "total": 0.000993 + "avg": 5.970192307692311e-05, + "count": 104, + "max": 0.00029, + "min": 3.5e-05, + "total": 0.006209000000000004 }, "sirius::Beta_projectors_base::inner": { - "avg": 6.942299107142844e-05, - "count": 896, - "max": 0.000251, + "avg": 7.008392857142838e-05, + "count": 1120, + "max": 0.000348, "min": 1.3e-05, - "total": 0.06220299999999988 + "total": 0.07849399999999979 }, "sirius::Beta_projectors_base::local_inner_aux": { - "avg": 6.624441964285708e-05, - "count": 896, - "max": 0.000242, - "min": 1e-05, - "total": 0.05935499999999994 + "avg": 6.678749999999965e-05, + "count": 1120, + "max": 0.000339, + "min": 1.1e-05, + "total": 0.0748019999999996 + }, + "sirius::Beta_projectors_base::prepare": { + "avg": 1.5000000000000005e-06, + "count": 16, + "max": 8e-06, + "min": 1e-06, + "total": 2.4000000000000007e-05 + }, + "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { + "avg": 0.0010535, + "count": 8, + "max": 0.001258, + "min": 0.000904, + "total": 0.008428 }, "sirius::Broyden1::mix": { - "avg": 0.0016773571428571432, + "avg": 0.0016656428571428569, "count": 14, - "max": 0.002895, - "min": 6.4e-05, - "total": 0.023483000000000004 + "max": 0.003024, + "min": 5.9e-05, + "total": 0.023318999999999996 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.030752, + "avg": 0.000374, "count": 1, - "max": 0.030752, - "min": 0.030752, - "total": 0.030752 + "max": 0.000374, + "min": 0.000374, + "total": 0.000374 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 23.348227, + "avg": 13.947341, "count": 1, - "max": 23.348227, - "min": 23.348227, - "total": 23.348227 + "max": 13.947341, + "min": 13.947341, + "total": 13.947341 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 1.6669045714285715, + "avg": 0.9956277142857142, "count": 14, - "max": 2.974037, - "min": 1.055617, - "total": 23.336664000000003 + "max": 1.871974, + "min": 0.5395759999999999, + "total": 13.938787999999999 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.0007078839285714287, + "avg": 0.000688464285714286, "count": 112, - "max": 0.000976, - "min": 0.00063, - "total": 0.07928300000000002 + "max": 0.000847, + "min": 0.000607, + "total": 0.07710800000000002 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.014206125000000009, + "avg": 0.007582732142857145, "count": 112, - "max": 0.01892, - "min": 0.01228, - "total": 1.5910860000000009 + "max": 0.008118, + "min": 0.007089, + "total": 0.8492660000000003 }, "sirius::Density::augment": { - "avg": 2.1428571428571428e-07, + "avg": 7.142857142857143e-07, "count": 14, "max": 1e-06, "min": 0.0, - "total": 3e-06 + "total": 1e-05 }, "sirius::Density::compute_atomic_mag_mom": { - "avg": 0.000157, + "avg": 0.000162, "count": 1, - "max": 0.000157, - "min": 0.000157, - "total": 0.000157 + "max": 0.000162, + "min": 0.000162, + "total": 0.000162 }, "sirius::Density::generate": { - "avg": 0.12219721428571428, + "avg": 0.06718007142857142, "count": 14, - "max": 0.140708, - "min": 0.115126, - "total": 1.710761 + "max": 0.068968, + "min": 0.065356, + "total": 0.9405209999999999 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.033005, + "avg": 0.000912, "count": 1, - "max": 0.033005, - "min": 0.033005, - "total": 0.033005 + "max": 0.000912, + "min": 0.000912, + "total": 0.000912 }, "sirius::Density::generate_valence": { - "avg": 0.122193, + "avg": 0.06717607142857143, "count": 14, - "max": 0.140703, - "min": 0.115122, - "total": 1.710702 + "max": 0.068964, + "min": 0.065349, + "total": 0.940465 }, "sirius::Density::initial_density": { - "avg": 0.042386, + "avg": 0.004634, "count": 1, - "max": 0.042386, - "min": 0.042386, - "total": 0.042386 + "max": 0.004634, + "min": 0.004634, + "total": 0.004634 }, "sirius::Density::update": { - "avg": 0.03302, + "avg": 0.000928, + "count": 1, + "max": 0.000928, + "min": 0.000928, + "total": 0.000928 + }, + "sirius::Force::calc_forces_core": { + "avg": 0.000654, + "count": 1, + "max": 0.000654, + "min": 0.000654, + "total": 0.000654 + }, + "sirius::Force::calc_forces_ewald": { + "avg": 0.000461, + "count": 1, + "max": 0.000461, + "min": 0.000461, + "total": 0.000461 + }, + "sirius::Force::calc_forces_nonloc": { + "avg": 0.05182, + "count": 1, + "max": 0.05182, + "min": 0.05182, + "total": 0.05182 + }, + "sirius::Force::calc_forces_scf_corr": { + "avg": 0.00023, "count": 1, - "max": 0.03302, - "min": 0.03302, - "total": 0.03302 + "max": 0.00023, + "min": 0.00023, + "total": 0.00023 + }, + "sirius::Force::calc_forces_us": { + "avg": 0.001716, + "count": 1, + "max": 0.001716, + "min": 0.001716, + "total": 0.001716 + }, + "sirius::Force::calc_forces_vloc": { + "avg": 0.00043, + "count": 1, + "max": 0.00043, + "min": 0.00043, + "total": 0.00043 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.0598818035714286, + "avg": 0.035763604166666664, "count": 336, - "max": 0.11032, - "min": 0.00275, - "total": 20.12028600000001 + "max": 0.059098, + "min": 0.001916, + "total": 12.016570999999999 }, "sirius::Hamiltonian::get_h_diag": { - "avg": 0.001787116071428571, + "avg": 0.0016532767857142855, "count": 112, - "max": 0.002897, - "min": 0.001553, - "total": 0.20015699999999995 + "max": 0.002645, + "min": 0.001363, + "total": 0.18516699999999997 }, "sirius::Hamiltonian::get_o_diag": { - "avg": 1.7705357142857156e-05, + "avg": 5.160714285714287e-06, "count": 112, - "max": 2.8e-05, - "min": 1.5e-05, - "total": 0.0019830000000000013 + "max": 1.2e-05, + "min": 3e-06, + "total": 0.0005780000000000002 }, "sirius::K_point::K_point": { "avg": 7.499999999999999e-07, "count": 8, - "max": 2e-06, + "max": 3e-06, "min": 0.0, "total": 5.999999999999999e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.00019637500000000002, + "avg": 0.0002265, "count": 8, - "max": 0.000213, - "min": 0.000188, - "total": 0.0015710000000000001 + "max": 0.000327, + "min": 0.000152, + "total": 0.001812 }, "sirius::K_point::initialize": { - "avg": 0.0008212499999999999, + "avg": 0.0010252500000000001, "count": 8, - "max": 0.001054, - "min": 0.00074, - "total": 0.0065699999999999995 + "max": 0.001351, + "min": 0.000886, + "total": 0.008202000000000001 }, "sirius::K_point::update": { - "avg": 0.000578875, + "avg": 0.000772875, "count": 8, - "max": 0.000784, - "min": 0.000506, - "total": 0.004631 + "max": 0.00101, + "min": 0.000661, + "total": 0.006183 }, "sirius::K_point_set::add_kpoint": { - "avg": 3.875e-06, + "avg": 3.7500000000000005e-06, "count": 8, - "max": 1.4e-05, + "max": 1.3e-05, "min": 2e-06, - "total": 3.1e-05 + "total": 3.0000000000000004e-05 }, "sirius::K_point_set::create_k_mesh": { - "avg": 0.0069, + "avg": 0.008347, "count": 1, - "max": 0.0069, - "min": 0.0069, - "total": 0.0069 + "max": 0.008347, + "min": 0.008347, + "total": 0.008347 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 0.0003719285714285714, + "avg": 0.00038271428571428576, "count": 14, - "max": 0.000459, - "min": 0.000305, - "total": 0.005207 + "max": 0.000491, + "min": 0.000329, + "total": 0.005358000000000001 }, "sirius::K_point_set::initialize": { - "avg": 0.006846, + "avg": 0.00828, "count": 1, - "max": 0.006846, - "min": 0.006846, - "total": 0.006846 + "max": 0.00828, + "min": 0.00828, + "total": 0.00828 }, "sirius::K_point_set::sync_band_energies": { - "avg": 1.1357142857142856e-05, + "avg": 8.285714285714284e-06, "count": 14, - "max": 1.9e-05, - "min": 1e-05, - "total": 0.000159 + "max": 2.7e-05, + "min": 6e-06, + "total": 0.00011599999999999997 }, "sirius::Local_operator::apply_h": { - "avg": 0.05883834523809527, + "avg": 0.034684812499999974, "count": 336, - "max": 0.108738, - "min": 0.002103, - "total": 19.769684000000012 + "max": 0.057538, + "min": 0.001285, + "total": 11.654096999999991 }, "sirius::Local_operator::prepare": { - "avg": 0.0002600444444444448, + "avg": 0.00011677777777777774, "count": 135, - "max": 0.002526, + "max": 0.0011, "min": 6e-06, - "total": 0.03510600000000005 + "total": 0.015764999999999994 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 9.66666666666667e-07, + "avg": 1.2666666666666667e-06, "count": 30, "max": 2e-06, "min": 0.0, - "total": 2.900000000000001e-05 + "total": 3.8e-05 }, "sirius::Non_local_operator::apply": { - "avg": 0.0001060513392857147, + "avg": 0.00010982291666666645, "count": 2688, - "max": 0.000232, - "min": 5.7e-05, - "total": 0.2850660000000011 + "max": 0.000254, + "min": 5.8e-05, + "total": 0.2952039999999994 }, "sirius::Periodic_function::add": { - "avg": 5.9666666666666656e-05, + "avg": 6.416666666666666e-05, "count": 30, - "max": 8.9e-05, - "min": 4.7e-05, - "total": 0.0017899999999999997 + "max": 0.000118, + "min": 4.8e-05, + "total": 0.001925 }, "sirius::Periodic_function::inner": { - "avg": 6.397297297297305e-05, - "count": 296, - "max": 0.000146, - "min": 4.9e-05, - "total": 0.018936000000000026 + "avg": 7.306711409395974e-05, + "count": 298, + "max": 0.000175, + "min": 5.2e-05, + "total": 0.021774 }, "sirius::Periodic_function::integrate": { - "avg": 5.745614035087719e-05, + "avg": 6.829824561403507e-05, "count": 57, - "max": 9e-05, - "min": 4.9e-05, - "total": 0.0032749999999999997 + "max": 0.000112, + "min": 5.6e-05, + "total": 0.003892999999999999 }, "sirius::Potential::Potential": { - "avg": 0.037137, + "avg": 0.004347, "count": 1, - "max": 0.037137, - "min": 0.037137, - "total": 0.037137 + "max": 0.004347, + "min": 0.004347, + "total": 0.004347 }, "sirius::Potential::generate": { - "avg": 0.04679086666666667, + "avg": 0.006515333333333333, "count": 15, - "max": 0.055036, - "min": 0.043207, - "total": 0.701863 + "max": 0.008232, + "min": 0.005744, + "total": 0.09773 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 7.659999999999999e-05, + "avg": 9.086666666666669e-05, "count": 15, - "max": 8.6e-05, - "min": 6.9e-05, - "total": 0.0011489999999999998 + "max": 0.000126, + "min": 7.6e-05, + "total": 0.0013630000000000003 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 3.9999999999999993e-07, + "avg": 4.666666666666666e-07, "count": 15, "max": 1e-06, "min": 0.0, - "total": 5.999999999999999e-06 + "total": 6.999999999999999e-06 }, "sirius::Potential::generate_local_potential": { - "avg": 0.034529, + "avg": 0.001552, "count": 1, - "max": 0.034529, - "min": 0.034529, - "total": 0.034529 + "max": 0.001552, + "min": 0.001552, + "total": 0.001552 }, "sirius::Potential::poisson": { - "avg": 0.03449580000000001, + "avg": 0.0006686, "count": 15, - "max": 0.040767, - "min": 0.031954, - "total": 0.5174370000000001 + "max": 0.000808, + "min": 0.000608, + "total": 0.010029 }, "sirius::Potential::update": { - "avg": 0.034544, + "avg": 0.001566, "count": 1, - "max": 0.034544, - "min": 0.034544, - "total": 0.034544 + "max": 0.001566, + "min": 0.001566, + "total": 0.001566 }, "sirius::Potential::xc": { - "avg": 0.0037481999999999993, + "avg": 0.0039214666666666665, "count": 15, - "max": 0.005021, - "min": 0.003224, - "total": 0.05622299999999999 + "max": 0.005162, + "min": 0.003288, + "total": 0.058822 }, "sirius::Potential::xc_rg_magnetic": { - "avg": 0.0037408666666666666, + "avg": 0.003914066666666666, "count": 15, - "max": 0.005014, - "min": 0.003217, - "total": 0.056112999999999996 + "max": 0.005153, + "min": 0.003282, + "total": 0.058711 }, "sirius::Potential::xc_rg_magnetic|libxc": { - "avg": 0.0010184666666666665, + "avg": 0.0011012, "count": 15, - "max": 0.001526, - "min": 0.000905, - "total": 0.015276999999999999 + "max": 0.001725, + "min": 0.000922, + "total": 0.016518 }, "sirius::Potential::xc_rg_magnetic|up_dn": { - "avg": 0.0004960666666666666, + "avg": 0.0005154666666666667, "count": 15, - "max": 0.000579, - "min": 0.00045, - "total": 0.007441 + "max": 0.000804, + "min": 0.00043, + "total": 0.007732 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.10427249999999999, + "avg": 0.104901, "count": 2, - "max": 0.106362, - "min": 0.102183, - "total": 0.20854499999999998 + "max": 0.107918, + "min": 0.101884, + "total": 0.209802 }, "sirius::Radial_integrals|aug": { "avg": 2.4999999999999998e-06, @@ -662,123 +733,165 @@ "total": 4.9999999999999996e-06 }, "sirius::Radial_integrals|beta": { - "avg": 0.1180045, + "avg": 0.11381150000000001, "count": 2, - "max": 0.125556, - "min": 0.110453, - "total": 0.236009 + "max": 0.118215, + "min": 0.109408, + "total": 0.22762300000000002 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.0327195, + "avg": 0.0332055, "count": 2, - "max": 0.033043, - "min": 0.032396, - "total": 0.065439 + "max": 0.035893, + "min": 0.030518, + "total": 0.066411 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.025998, + "avg": 0.026875, "count": 1, - "max": 0.025998, - "min": 0.025998, - "total": 0.025998 + "max": 0.026875, + "min": 0.026875, + "total": 0.026875 }, "sirius::Radial_integrals|vloc": { - "avg": 0.101684, + "avg": 0.10631650000000001, "count": 2, - "max": 0.109035, - "min": 0.094333, - "total": 0.203368 + "max": 0.11049, + "min": 0.102143, + "total": 0.21263300000000002 }, "sirius::Simulation_context::init_atoms_to_grid_idx": { - "avg": 0.001128, + "avg": 0.001189, "count": 1, - "max": 0.001128, - "min": 0.001128, - "total": 0.001128 + "max": 0.001189, + "min": 0.001189, + "total": 0.001189 }, "sirius::Simulation_context::init_comm": { - "avg": 0.000555, + "avg": 0.000244, "count": 1, - "max": 0.000555, - "min": 0.000555, - "total": 0.000555 + "max": 0.000244, + "min": 0.000244, + "total": 0.000244 }, "sirius::Simulation_context::init_fft": { - "avg": 0.046833, + "avg": 0.012272, "count": 1, - "max": 0.046833, - "min": 0.046833, - "total": 0.046833 + "max": 0.012272, + "min": 0.012272, + "total": 0.012272 }, "sirius::Simulation_context::initialize": { - "avg": 0.831921, + "avg": 0.770458, "count": 1, - "max": 0.831921, - "min": 0.831921, - "total": 0.831921 + "max": 0.770458, + "min": 0.770458, + "total": 0.770458 }, "sirius::Simulation_context::make_periodic_function": { - "avg": 0.03110833333333333, - "count": 3, - "max": 0.031758, - "min": 0.030675, - "total": 0.09332499999999999 + "avg": 0.00016283333333333332, + "count": 6, + "max": 0.000209, + "min": 0.000129, + "total": 0.000977 }, "sirius::Simulation_context::update": { - "avg": 0.037131, + "avg": 0.003056, "count": 1, - "max": 0.037131, - "min": 0.037131, - "total": 0.037131 + "max": 0.003056, + "min": 0.003056, + "total": 0.003056 }, "sirius::Simulation_parameters::import": { - "avg": 0.000264, + "avg": 0.000166, "count": 1, - "max": 0.000264, - "min": 0.000264, - "total": 0.000264 + "max": 0.000166, + "min": 0.000166, + "total": 0.000166 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.0015615967741935488, - "count": 310, - "max": 0.003891, - "min": 0.000352, - "total": 0.4840950000000001 + "avg": 0.00036431464174454826, + "count": 321, + "max": 0.001283, + "min": 0.00011, + "total": 0.116945 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 2.6750000000000003e-05, + "avg": 1.6374999999999998e-05, "count": 8, - "max": 3.2e-05, - "min": 2.5e-05, - "total": 0.00021400000000000002 + "max": 2e-05, + "min": 1.4e-05, + "total": 0.00013099999999999999 }, "sirius::Smooth_periodic_function|inner": { - "avg": 6.178235294117646e-05, - "count": 340, - "max": 0.000144, - "min": 4.7e-05, - "total": 0.021005999999999997 + "avg": 7.007580174927122e-05, + "count": 343, + "max": 0.000174, + "min": 5.1e-05, + "total": 0.02403600000000003 + }, + "sirius::Stress|ewald": { + "avg": 0.0006489999999999999, + "count": 1, + "max": 0.0006489999999999999, + "min": 0.0006489999999999999, + "total": 0.0006489999999999999 + }, + "sirius::Stress|har": { + "avg": 0.000219, + "count": 1, + "max": 0.000219, + "min": 0.000219, + "total": 0.000219 + }, + "sirius::Stress|kin": { + "avg": 0.001574, + "count": 1, + "max": 0.001574, + "min": 0.001574, + "total": 0.001574 + }, + "sirius::Stress|nonloc": { + "avg": 0.164595, + "count": 1, + "max": 0.164595, + "min": 0.164595, + "total": 0.164595 + }, + "sirius::Stress|us": { + "avg": 0.556771, + "count": 1, + "max": 0.556771, + "min": 0.556771, + "total": 0.556771 + }, + "sirius::Stress|vloc": { + "avg": 0.000547, + "count": 1, + "max": 0.000547, + "min": 0.000547, + "total": 0.000547 }, "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.00025049999999999996, + "avg": 0.0002515, "count": 2, - "max": 0.000345, - "min": 0.000156, - "total": 0.0005009999999999999 + "max": 0.000388, + "min": 0.000115, + "total": 0.000503 }, "sirius::Unit_cell::initialize": { - "avg": 0.006506, + "avg": 0.010308, "count": 1, - "max": 0.006506, - "min": 0.006506, - "total": 0.006506 + "max": 0.010308, + "min": 0.010308, + "total": 0.010308 }, "sirius::Unit_cell::update": { - "avg": 0.000257, + "avg": 0.0002705, "count": 2, - "max": 0.000353, - "min": 0.000161, - "total": 0.000514 + "max": 0.000395, + "min": 0.000146, + "total": 0.000541 } } } \ No newline at end of file diff --git a/verification/test10/sirius.json b/verification/test10/sirius.json index 3d0dd3fd5..94fb0e644 100644 --- a/verification/test10/sirius.json +++ b/verification/test10/sirius.json @@ -6,7 +6,9 @@ "verbosity" : 1, "print_checksum" : false, "verification" : 0, - "cyclic block size" : 2 + "cyclic block size" : 2, + "print_forces" : true, + "print_stress" : true }, "iterative_solver" : { "tolerance" : 1e-5, diff --git a/verification/test11/output_ref.json b/verification/test11/output_ref.json index 5a40845fe..ad5c19f66 100644 --- a/verification/test11/output_ref.json +++ b/verification/test11/output_ref.json @@ -1,505 +1,603 @@ { - "build_date": "Thu, 6 Sep 2018 09:50:51", "comm_world_size": 1, "counters": { - "band_evp_work_count": 2197.4909629629647, - "local_operator_num_applied": 5381 + "band_evp_work_count": 2045.4444444444437, + "local_operator_num_applied": 5121 }, - "git_hash": "df65c9ae1af51c668b8f0cc9e4ea1e1bb142f720", + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, "band_gap": 0.0, "chemical_formula": "Au", "converged": true, "core_leakage": 0.0, - "efermi": 0.5653634339426149, + "efermi": 0.5653634406119041, "energy": { - "bxc": -0.002787164893735792, + "bxc": -0.002787162841643533, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": 4.857604853991188, - "ewald": -35.97719221320382, - "exc": -15.777622309873639, - "kin": 8.818177208529077, - "total": -42.28567059413017, - "veff": -3.957785189644153, - "vha": 7.56539468349988, - "vxc": -8.391449251812306 - }, - "fft_coarse_grid": [30,30,30], + "eval_sum": 4.8576049728725454, + "ewald": -35.977191952645654, + "exc": -15.777622358081999, + "kin": 8.81817725651116, + "total": -42.285670333534895, + "veff": -3.9577851207969696, + "vha": 7.5653949188528475, + "vxc": -8.391449300904997 + }, + "fft_coarse_grid": [32,32,32], "fft_grid": [48,48,48], + "forces": [ + [-1.6719728498510342e-08,1.184877287252558e-08,1.6260295935050367e-08] + ], "mpi_grid": [1,1], "num_atoms": 1, "num_bands": 30, "num_fv_states": -1, "num_scf_iterations": 10, "omega": 114.57850275, - "pw_cutoff": 26.4575 + "pw_cutoff": 26.4575, + "stress": [ + [0.10072069130721512,1.9430841783500356e-05,-5.000717232166086e-13], + [3.454976337735086e-05,0.10068564454443457,-4.534565559864681e-14], + [-5.481220024856852e-13,-1.2994622722705077e-13,0.10065822769738353] + ] }, "task": 0, "threads_per_rank": 8, "timers": { - "Eigensolver_lapack::solve_std": { - "avg": 0.0016599176470588237, - "count": 170, - "max": 0.005365, - "min": 0.000573, - "total": 0.28218600000000005 - }, - "Eigensolver_lapack::solve_std|zheevr": { - "avg": 0.0016463470588235288, - "count": 170, - "max": 0.005341, - "min": 0.000566, - "total": 0.2798789999999999 + "Eigensolver_lapack|zheevr": { + "avg": 0.0010764113924050635, + "count": 158, + "max": 0.001917, + "min": 0.000515, + "total": 0.17007300000000003 + }, + "Eigensolver_lapack|zhegvx": { + "avg": 0.0006151458333333335, + "count": 96, + "max": 0.001315, + "min": 0.000499, + "total": 0.059054000000000016 }, "sddk::FFT3D::FFT3D": { - "avg": 0.00294, + "avg": 0.0042375, "count": 2, - "max": 0.004294, - "min": 0.001586, - "total": 0.00588 + "max": 0.007108, + "min": 0.001367, + "total": 0.008475 }, "sddk::FFT3D::prepare": { - "avg": 7.836057692307686e-05, + "avg": 6.658173076923079e-05, "count": 208, - "max": 0.000284, - "min": 3.9e-05, - "total": 0.016298999999999987 + "max": 0.000173, + "min": 4.6e-05, + "total": 0.013849000000000005 }, "sddk::FFT3D::prepare|cpu": { - "avg": 7.435576923076925e-05, + "avg": 6.156249999999993e-05, "count": 208, - "max": 0.00028, - "min": 3.7e-05, - "total": 0.015466000000000004 + "max": 0.000146, + "min": 4.2e-05, + "total": 0.012804999999999985 }, "sddk::FFT3D::transform": { - "avg": 0.0008753119642083964, - "count": 34645, - "max": 0.006807, - "min": 0.000309, - "total": 30.325182999999896 + "avg": 0.00017770869591491787, + "count": 33096, + "max": 0.001683, + "min": 0.000122, + "total": 5.881447000000122 }, "sddk::FFT3D::transform_xy": { - "avg": 0.0001823263385770011, - "count": 34645, - "max": 0.000878, - "min": 0.000145, - "total": 6.316696000000203 + "avg": 8.384019216823969e-05, + "count": 33096, + "max": 0.001135, + "min": 5.6e-05, + "total": 2.774775000000061 }, "sddk::FFT3D::transform_z": { - "avg": 0.0006866940106797632, - "count": 34645, - "max": 0.006081, - "min": 0.000124, - "total": 23.790514000000396 + "avg": 8.775966884215767e-05, + "count": 33096, + "max": 0.000758, + "min": 4.7e-05, + "total": 2.9044940000000503 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.0006837543945735358, - "count": 34645, - "max": 0.006076, - "min": 0.000121, - "total": 23.68867100000015 + "avg": 8.519609620498154e-05, + "count": 33096, + "max": 0.000755, + "min": 4.6e-05, + "total": 2.819650000000069 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.0006803990474815962, - "count": 34645, - "max": 0.006069, - "min": 0.00012, - "total": 23.5724249999999 + "avg": 8.224519579405525e-05, + "count": 33096, + "max": 0.000747, + "min": 4.3e-05, + "total": 2.721987000000053 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.0003481499999999999, + "avg": 0.0003995499999999999, "count": 20, - "max": 0.001427, - "min": 0.000124, - "total": 0.006962999999999999 + "max": 0.001637, + "min": 0.000193, + "total": 0.007990999999999998 }, "sddk::Gvec::init": { - "avg": 0.0015316, + "avg": 0.0010856000000000001, "count": 10, - "max": 0.007979, - "min": 0.000567, - "total": 0.015316 - }, - "sddk::Wave_functions::inner": { - "avg": 0.002523252136752138, - "count": 702, - "max": 0.011954, - "min": 4e-06, - "total": 1.7713230000000009 - }, - "sddk::Wave_functions::orthogonalize": { - "avg": 0.01145314705882353, - "count": 170, - "max": 0.047343, - "min": 0.000597, - "total": 1.947035 - }, - "sddk::Wave_functions::transform": { - "avg": 0.005537615221987314, - "count": 473, - "max": 0.027974, - "min": 0.000227, - "total": 2.6192919999999997 - }, - "sddk::Wave_functions::transform|init": { - "avg": 0.00021203805496828755, - "count": 473, - "max": 0.002178, - "min": 4e-06, - "total": 0.10029400000000001 + "max": 0.005383, + "min": 0.000443, + "total": 0.010856000000000001 + }, + "sddk::inner": { + "avg": 0.0003828828828828829, + "count": 666, + "max": 0.001207, + "min": 1e-05, + "total": 0.255 + }, + "sddk::inner|local": { + "avg": 0.0003794219219219223, + "count": 666, + "max": 0.001194, + "min": 8e-06, + "total": 0.2526950000000002 }, "sddk::matrix_storage::matrix_storage": { - "avg": 8.741197183098244e-06, + "avg": 5.6531690140845584e-06, "count": 1136, - "max": 0.000658, + "max": 9e-05, "min": 0.0, - "total": 0.009929999999999604 + "total": 0.006422000000000059 }, "sddk::matrix_storage::remap_backward": { - "avg": 5.582706766917304e-07, - "count": 532, - "max": 6e-06, + "avg": 7.0669291338583e-07, + "count": 508, + "max": 1.7e-05, "min": 0.0, - "total": 0.00029700000000000055 + "total": 0.0003590000000000017 }, "sddk::matrix_storage::remap_forward": { - "avg": 2.1807909604519727e-06, - "count": 708, - "max": 3.6e-05, + "avg": 3.508771929824567e-06, + "count": 684, + "max": 1.3e-05, + "min": 1e-06, + "total": 0.0024000000000000037 + }, + "sddk::matrix_storage::set_num_extra": { + "avg": 9.169463087248414e-07, + "count": 1192, + "max": 1e-05, "min": 0.0, - "total": 0.0015439999999999968 + "total": 0.0010930000000000108 + }, + "sddk::orthogonalize": { + "avg": 0.0012394810126582277, + "count": 158, + "max": 0.002534, + "min": 0.000294, + "total": 0.19583799999999998 + }, + "sddk::orthogonalize|tmtrx": { + "avg": 1.9588607594936708e-05, + "count": 158, + "max": 9.8e-05, + "min": 2e-06, + "total": 0.0030949999999999997 + }, + "sddk::orthogonalize|transform": { + "avg": 0.0001560379746835443, + "count": 158, + "max": 0.000537, + "min": 1.7e-05, + "total": 0.024654 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.00859, + "avg": 0.003896, "count": 1, - "max": 0.00859, - "min": 0.00859, - "total": 0.00859 + "max": 0.003896, + "min": 0.003896, + "total": 0.003896 + }, + "sddk::transform": { + "avg": 0.0004095657276995304, + "count": 426, + "max": 0.001267, + "min": 7.3e-05, + "total": 0.17447499999999994 + }, + "sddk::transform|init": { + "avg": 6.992957746478882e-05, + "count": 426, + "max": 0.000685, + "min": 4e-06, + "total": 0.02979000000000004 + }, + "sddk::transform|local": { + "avg": 0.00015501641137855574, + "count": 914, + "max": 0.00073, + "min": 2.8e-05, + "total": 0.14168499999999995 }, "sirius::Atom_type::init": { - "avg": 0.106941, + "avg": 0.092464, "count": 1, - "max": 0.106941, - "min": 0.106941, - "total": 0.106941 + "max": 0.092464, + "min": 0.092464, + "total": 0.092464 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.234789, + "avg": 0.183745, "count": 1, - "max": 0.234789, - "min": 0.234789, - "total": 0.234789 + "max": 0.183745, + "min": 0.183745, + "total": 0.183745 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { + "avg": 0.2478486666666667, + "count": 12, + "max": 0.253175, + "min": 0.243585, + "total": 2.974184 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { + "avg": 0.236982, + "count": 12, + "max": 0.24702, + "min": 0.231516, + "total": 2.843784 + }, + "sirius::Augmentation_operator_gvec_deriv|constructor": { + "avg": 0.064144, + "count": 1, + "max": 0.064144, + "min": 0.064144, + "total": 0.064144 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.43890243181818195, + "avg": 0.11474322727272722, "count": 88, - "max": 1.189172, - "min": 0.204506, - "total": 38.62341400000001 + "max": 0.236629, + "min": 0.059651, + "total": 10.097403999999996 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 0.00010889772727272727, + "avg": 0.0001001022727272727, "count": 88, - "max": 0.000333, - "min": 7.2e-05, - "total": 0.009583 + "max": 0.000206, + "min": 5e-05, + "total": 0.008808999999999997 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.001319709302325581, - "count": 258, - "max": 0.005372, - "min": 0.000511, - "total": 0.3404849999999999 + "avg": 0.0009114065040650406, + "count": 246, + "max": 0.00192, + "min": 0.000502, + "total": 0.224206 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.4303812500000001, + "avg": 0.1086283863636364, "count": 88, - "max": 1.178437, - "min": 0.197053, - "total": 37.87355000000001 + "max": 0.230554, + "min": 0.054034, + "total": 9.559298000000004 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 0.00737584375, - "count": 96, - "max": 0.035369, - "min": 0.002613, - "total": 0.708081 - }, - "sirius::Band::diag_pseudo_potential_davidson|wf": { - "avg": 6.313636363636364e-05, + "avg": 0.00034957954545454534, "count": 88, - "max": 0.001188, - "min": 2.4e-05, - "total": 0.005556 + "max": 0.000772, + "min": 0.00021, + "total": 0.030762999999999992 }, "sirius::Band::initialize_subspace": { - "avg": 2.572908, + "avg": 0.509673, "count": 1, - "max": 2.572908, - "min": 2.572908, - "total": 2.572908 + "max": 0.509673, + "min": 0.509673, + "total": 0.509673 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.32045575000000004, + "avg": 0.063274625, "count": 8, - "max": 0.32475, - "min": 0.31511, - "total": 2.5636460000000003 + "max": 0.066128, + "min": 0.061924, + "total": 0.506197 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.12017462500000001, + "avg": 0.000663375, "count": 8, - "max": 0.127923, - "min": 0.11547, - "total": 0.9613970000000001 + "max": 0.000786, + "min": 0.00061, + "total": 0.005307 }, "sirius::Band::residuals": { - "avg": 0.004130248062015509, - "count": 258, - "max": 0.018857, + "avg": 0.0007435000000000001, + "count": 246, + "max": 0.002121, "min": 0.0, - "total": 1.0656040000000013 + "total": 0.18290100000000004 }, "sirius::Band::residuals_aux": { - "avg": 0.0011572879581151832, - "count": 191, - "max": 0.002495, - "min": 0.000426, - "total": 0.221042 + "avg": 0.0007256569767441859, + "count": 172, + "max": 0.001099, + "min": 0.00054, + "total": 0.12481299999999998 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.003483008287292817, - "count": 362, - "max": 0.012212, - "min": 0.000297, - "total": 1.2608489999999997 + "avg": 0.0006048085714285713, + "count": 350, + "max": 0.001375, + "min": 0.000257, + "total": 0.21168299999999995 }, "sirius::Band::solve": { - "avg": 3.5220230000000003, + "avg": 0.9219838181818182, "count": 11, - "max": 8.787127, - "min": 1.707228, - "total": 38.742253000000005 + "max": 1.579798, + "min": 0.522039, + "total": 10.141822 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.000627125, + "avg": 0.00120175, "count": 8, - "max": 0.000879, - "min": 0.000496, - "total": 0.005017 + "max": 0.001328, + "min": 0.001083, + "total": 0.009614 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.00062275, + "avg": 0.0007345, "count": 8, - "max": 0.000874, - "min": 0.000492, - "total": 0.004982 + "max": 0.000884, + "min": 0.000628, + "total": 0.005876 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 3.749999999999995e-07, - "count": 184, + "avg": 3.333333333333328e-07, + "count": 216, "max": 1e-06, "min": 0.0, - "total": 6.89999999999999e-05 + "total": 7.19999999999999e-05 }, "sirius::Beta_projectors_base::generate": { - "avg": 0.00017598870056497173, - "count": 354, - "max": 0.000778, - "min": 0.000151, - "total": 0.062299999999999994 + "avg": 0.0001938846153846154, + "count": 104, + "max": 0.000461, + "min": 0.000149, + "total": 0.020164 }, "sirius::Beta_projectors_base::inner": { - "avg": 0.0010383460451977424, - "count": 708, - "max": 0.002606, - "min": 5.4e-05, - "total": 0.7351490000000016 + "avg": 0.00018224779735682742, + "count": 908, + "max": 0.000549, + "min": 3.7e-05, + "total": 0.1654809999999993 + }, + "sirius::Beta_projectors_base::local_inner_aux": { + "avg": 0.0001786872246696025, + "count": 908, + "max": 0.000544, + "min": 3.5e-05, + "total": 0.16224799999999906 }, "sirius::Beta_projectors_base::prepare": { - "avg": 4.081521739130455e-06, - "count": 184, - "max": 0.000291, - "min": 0.0, - "total": 0.0007510000000000037 + "avg": 1.4375000000000002e-06, + "count": 16, + "max": 2e-06, + "min": 1e-06, + "total": 2.3000000000000003e-05 + }, + "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { + "avg": 0.00281225, + "count": 8, + "max": 0.00327, + "min": 0.0026, + "total": 0.022498 }, "sirius::Broyden1::mix": { - "avg": 0.002233181818181818, + "avg": 0.0015293636363636364, "count": 11, - "max": 0.00549, - "min": 8.7e-05, - "total": 0.024565 - }, - "sirius::DFT_ground_state::compute_atomic_mag_mom": { - "avg": 0.018598333333333338, - "count": 12, - "max": 0.022814, - "min": 0.000261, - "total": 0.22318000000000005 + "max": 0.003396, + "min": 7.8e-05, + "total": 0.016823 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.07169, + "avg": 0.000469, "count": 1, - "max": 0.07169, - "min": 0.07169, - "total": 0.07169 + "max": 0.000469, + "min": 0.000469, + "total": 0.000469 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 47.444022, + "avg": 14.489491, "count": 1, - "max": 47.444022, - "min": 47.444022, - "total": 47.444022 + "max": 14.489491, + "min": 14.489491, + "total": 14.489491 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 4.311655454545454, + "avg": 1.3163536363636366, "count": 11, - "max": 9.585276, - "min": 2.497988, - "total": 47.42820999999999 + "max": 1.976731, + "min": 0.91111, + "total": 14.479890000000003 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.001891, + "avg": 0.0006253068181818181, "count": 88, - "max": 0.003122, - "min": 0.001729, - "total": 0.166408 + "max": 0.000918, + "min": 0.000542, + "total": 0.055027 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.02292370454545456, + "avg": 0.006448920454545455, "count": 88, - "max": 0.028856, - "min": 0.021154, - "total": 2.0172860000000012 + "max": 0.009003, + "min": 0.005731, + "total": 0.567505 }, "sirius::Density::augment": { - "avg": 0.3311835454545454, + "avg": 0.2615190909090909, "count": 11, - "max": 0.341437, - "min": 0.317278, - "total": 3.643019 + "max": 0.30595, + "min": 0.248637, + "total": 2.87671 + }, + "sirius::Density::compute_atomic_mag_mom": { + "avg": 0.000247, + "count": 1, + "max": 0.000247, + "min": 0.000247, + "total": 0.000247 }, "sirius::Density::generate": { - "avg": 0.5373566363636364, + "avg": 0.31956199999999996, "count": 11, - "max": 0.548429, - "min": 0.517462, - "total": 5.910923 + "max": 0.377818, + "min": 0.304907, + "total": 3.515182 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.080838, + "avg": 0.001225, "count": 1, - "max": 0.080838, - "min": 0.080838, - "total": 0.080838 + "max": 0.001225, + "min": 0.001225, + "total": 0.001225 }, "sirius::Density::generate_rho_aug": { - "avg": 0.3305785454545455, + "avg": 0.26107309090909087, "count": 11, - "max": 0.340945, - "min": 0.316576, - "total": 3.6363640000000004 + "max": 0.305471, + "min": 0.248182, + "total": 2.8718039999999996 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.04276086363636364, + "avg": 0.04528045454545455, "count": 44, - "max": 0.109454, - "min": 0.019896, - "total": 1.881478 - }, - "sirius::Density::generate_rho_aug|phase_fac": { - "avg": 0.07429700000000002, - "count": 11, - "max": 0.077373, - "min": 0.072261, - "total": 0.8172670000000002 + "max": 0.135843, + "min": 0.020154, + "total": 1.99234 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.017933522727272732, + "avg": 0.01697027272727273, "count": 44, - "max": 0.021455, - "min": 0.016728, - "total": 0.7890750000000002 + "max": 0.020324, + "min": 0.015755, + "total": 0.7466920000000001 }, "sirius::Density::generate_valence": { - "avg": 0.5373504545454546, + "avg": 0.31955736363636367, "count": 11, - "max": 0.548419, - "min": 0.517458, - "total": 5.910855000000001 + "max": 0.377813, + "min": 0.304902, + "total": 3.5151310000000002 }, "sirius::Density::initial_density": { - "avg": 0.097798, + "avg": 0.006733, "count": 1, - "max": 0.097798, - "min": 0.097798, - "total": 0.097798 + "max": 0.006733, + "min": 0.006733, + "total": 0.006733 }, "sirius::Density::update": { - "avg": 0.081516, + "avg": 0.00125, + "count": 1, + "max": 0.00125, + "min": 0.00125, + "total": 0.00125 + }, + "sirius::Force::calc_forces_core": { + "avg": 0.001319, + "count": 1, + "max": 0.001319, + "min": 0.001319, + "total": 0.001319 + }, + "sirius::Force::calc_forces_ewald": { + "avg": 0.001033, + "count": 1, + "max": 0.001033, + "min": 0.001033, + "total": 0.001033 + }, + "sirius::Force::calc_forces_nonloc": { + "avg": 0.037395, + "count": 1, + "max": 0.037395, + "min": 0.037395, + "total": 0.037395 + }, + "sirius::Force::calc_forces_scf_corr": { + "avg": 0.000529, "count": 1, - "max": 0.081516, - "min": 0.081516, - "total": 0.081516 + "max": 0.000529, + "min": 0.000529, + "total": 0.000529 + }, + "sirius::Force::calc_forces_us": { + "avg": 0.110845, + "count": 1, + "max": 0.110845, + "min": 0.110845, + "total": 0.110845 + }, + "sirius::Force::calc_forces_vloc": { + "avg": 0.000876, + "count": 1, + "max": 0.000876, + "min": 0.000876, + "total": 0.000876 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.12780797744360894, - "count": 266, - "max": 0.209156, - "min": 0.006323, - "total": 33.99692199999998 + "avg": 0.03601203149606299, + "count": 254, + "max": 0.066747, + "min": 0.002386, + "total": 9.147056 }, "sirius::Hamiltonian::get_h_diag": { - "avg": 0.004995909090909092, + "avg": 0.0038823409090909104, "count": 88, - "max": 0.006457, - "min": 0.00406, - "total": 0.43964000000000003 + "max": 0.006711, + "min": 0.003062, + "total": 0.3416460000000001 }, "sirius::Hamiltonian::get_o_diag": { - "avg": 0.002526931818181818, + "avg": 0.0017623409090909094, "count": 88, - "max": 0.00377, - "min": 0.002025, - "total": 0.22236999999999998 - }, - "sirius::Hamiltonian::prepare": { - "avg": 0.0014954166666666666, - "count": 12, - "max": 0.001633, - "min": 0.001446, - "total": 0.017945 + "max": 0.002829, + "min": 0.001485, + "total": 0.15508600000000003 }, "sirius::K_point::K_point": { - "avg": 1e-06, + "avg": 8.749999999999999e-07, "count": 8, - "max": 4e-06, + "max": 3e-06, "min": 0.0, - "total": 8e-06 + "total": 6.999999999999999e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.000815875, + "avg": 0.0005183750000000001, "count": 8, - "max": 0.000964, - "min": 0.000719, - "total": 0.006527 + "max": 0.000613, + "min": 0.000459, + "total": 0.0041470000000000005 }, "sirius::K_point::initialize": { - "avg": 0.00312225, + "avg": 0.00203125, "count": 8, - "max": 0.003778, - "min": 0.0027, - "total": 0.024978 + "max": 0.002254, + "min": 0.001847, + "total": 0.01625 }, "sirius::K_point::update": { - "avg": 0.0014516249999999998, + "avg": 0.0014780000000000001, "count": 8, - "max": 0.00184, - "min": 0.001118, - "total": 0.011612999999999998 + "max": 0.001608, + "min": 0.001355, + "total": 0.011824000000000001 }, "sirius::K_point_set::add_kpoint": { "avg": 3.875e-06, @@ -509,291 +607,354 @@ "total": 3.1e-05 }, "sirius::K_point_set::create_k_mesh": { - "avg": 0.025387, + "avg": 0.016379, "count": 1, - "max": 0.025387, - "min": 0.025387, - "total": 0.025387 + "max": 0.016379, + "min": 0.016379, + "total": 0.016379 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 0.0003224545454545455, + "avg": 0.0004023636363636364, "count": 11, - "max": 0.000404, - "min": 0.000272, - "total": 0.0035470000000000002 + "max": 0.000554, + "min": 0.000332, + "total": 0.004426 }, "sirius::K_point_set::initialize": { - "avg": 0.025335, + "avg": 0.016331, "count": 1, - "max": 0.025335, - "min": 0.025335, - "total": 0.025335 + "max": 0.016331, + "min": 0.016331, + "total": 0.016331 }, "sirius::K_point_set::sync_band_energies": { - "avg": 1.4363636363636365e-05, + "avg": 6.636363636363637e-06, "count": 11, - "max": 2.5e-05, - "min": 9e-06, - "total": 0.00015800000000000002 + "max": 1.1e-05, + "min": 5e-06, + "total": 7.3e-05 }, "sirius::Local_operator::apply_h": { - "avg": 0.11539625939849625, - "count": 266, - "max": 0.18909, - "min": 0.00528, - "total": 30.695405 + "avg": 0.03430340551181104, + "count": 254, + "max": 0.063897, + "min": 0.001629, + "total": 8.713065000000004 }, "sirius::Local_operator::prepare": { - "avg": 0.0009180925925925937, + "avg": 0.000186925925925926, "count": 108, - "max": 0.009642, - "min": 3e-05, - "total": 0.09915400000000012 + "max": 0.001678, + "min": 2.9e-05, + "total": 0.020188000000000008 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 1.041666666666667e-06, + "avg": 1.4583333333333333e-06, "count": 24, - "max": 2e-06, + "max": 3e-06, "min": 0.0, - "total": 2.5000000000000008e-05 + "total": 3.5e-05 }, "sirius::Non_local_operator::apply": { - "avg": 0.001182469454887217, - "count": 2128, - "max": 0.003615, - "min": 8.4e-05, - "total": 2.516294999999998 + "avg": 0.00013624803149606327, + "count": 2032, + "max": 0.000643, + "min": 5.9e-05, + "total": 0.27685600000000055 }, "sirius::Periodic_function::add": { - "avg": 0.000125125, + "avg": 7.570833333333335e-05, "count": 24, - "max": 0.000193, - "min": 7.4e-05, - "total": 0.0030029999999999996 + "max": 0.000111, + "min": 5.9e-05, + "total": 0.0018170000000000003 }, "sirius::Periodic_function::inner": { - "avg": 8.086864406779668e-05, - "count": 236, - "max": 0.000265, - "min": 5.6e-05, - "total": 0.019085000000000015 + "avg": 9.876470588235298e-05, + "count": 238, + "max": 0.000207, + "min": 6.8e-05, + "total": 0.02350600000000001 }, "sirius::Periodic_function::integrate": { - "avg": 6.34888888888889e-05, + "avg": 9.026666666666667e-05, "count": 45, - "max": 9.8e-05, - "min": 5.3e-05, - "total": 0.0028570000000000006 + "max": 0.000152, + "min": 7.1e-05, + "total": 0.0040620000000000005 }, "sirius::Potential::Potential": { - "avg": 0.097104, + "avg": 0.007487, "count": 1, - "max": 0.097104, - "min": 0.097104, - "total": 0.097104 + "max": 0.007487, + "min": 0.007487, + "total": 0.007487 }, "sirius::Potential::generate": { - "avg": 0.19138391666666665, + "avg": 0.06488016666666666, "count": 12, - "max": 0.203727, - "min": 0.162085, - "total": 2.296607 + "max": 0.080047, + "min": 0.060996, + "total": 0.7785619999999999 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.08143266666666667, + "avg": 0.05363266666666666, "count": 12, - "max": 0.089545, - "min": 0.050223, - "total": 0.977192 + "max": 0.06737, + "min": 0.050951, + "total": 0.6435919999999999 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 5.833333333333333e-07, + "avg": 3.333333333333333e-07, "count": 12, "max": 1e-06, "min": 0.0, - "total": 6.999999999999999e-06 + "total": 4e-06 }, "sirius::Potential::generate_local_potential": { - "avg": 0.094184, + "avg": 0.002306, "count": 1, - "max": 0.094184, - "min": 0.094184, - "total": 0.094184 + "max": 0.002306, + "min": 0.002306, + "total": 0.002306 }, "sirius::Potential::poisson": { - "avg": 0.0804695, + "avg": 0.0007354999999999999, "count": 12, - "max": 0.086803, - "min": 0.077009, - "total": 0.965634 + "max": 0.000907, + "min": 0.000678, + "total": 0.008825999999999999 }, "sirius::Potential::update": { - "avg": 0.094682, + "avg": 0.002328, "count": 1, - "max": 0.094682, - "min": 0.094682, - "total": 0.094682 + "max": 0.002328, + "min": 0.002328, + "total": 0.002328 }, "sirius::Potential::xc": { - "avg": 0.011908416666666666, + "avg": 0.007949416666666667, "count": 12, - "max": 0.016617, - "min": 0.008182, - "total": 0.142901 + "max": 0.010506, + "min": 0.006549, + "total": 0.095393 }, "sirius::Potential::xc_rg_magnetic": { - "avg": 0.011901833333333334, + "avg": 0.007940083333333334, "count": 12, - "max": 0.016613, - "min": 0.008174, - "total": 0.142822 + "max": 0.010497, + "min": 0.006541, + "total": 0.095281 }, "sirius::Potential::xc_rg_magnetic|libxc": { - "avg": 0.008717416666666667, + "avg": 0.002637083333333333, "count": 12, - "max": 0.012383, - "min": 0.005354, - "total": 0.104609 + "max": 0.00332, + "min": 0.00228, + "total": 0.03164499999999999 }, "sirius::Potential::xc_rg_magnetic|up_dn": { - "avg": 0.0009441666666666665, + "avg": 0.0008939999999999999, "count": 12, - "max": 0.001804, - "min": 0.000809, - "total": 0.011329999999999998 + "max": 0.001196, + "min": 0.000791, + "total": 0.010728 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.165609, + "avg": 0.1656765, "count": 2, - "max": 0.168675, - "min": 0.162543, - "total": 0.331218 + "max": 0.168126, + "min": 0.163227, + "total": 0.331353 }, "sirius::Radial_integrals|aug": { - "avg": 1.113209, + "avg": 1.0346615, "count": 2, - "max": 1.444383, - "min": 0.782035, - "total": 2.226418 + "max": 1.313866, + "min": 0.755457, + "total": 2.069323 }, "sirius::Radial_integrals|beta": { - "avg": 0.1519335, + "avg": 0.160332, "count": 2, - "max": 0.158034, - "min": 0.145833, - "total": 0.303867 + "max": 0.170917, + "min": 0.149747, + "total": 0.320664 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.0369955, + "avg": 0.033044500000000004, "count": 2, - "max": 0.038306, - "min": 0.035685, - "total": 0.073991 + "max": 0.037255, + "min": 0.028834, + "total": 0.06608900000000001 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.032167, + "avg": 0.029946, "count": 1, - "max": 0.032167, - "min": 0.032167, - "total": 0.032167 + "max": 0.029946, + "min": 0.029946, + "total": 0.029946 }, "sirius::Radial_integrals|vloc": { - "avg": 0.121786, + "avg": 0.110066, "count": 2, - "max": 0.132353, - "min": 0.111219, - "total": 0.243572 + "max": 0.122045, + "min": 0.098087, + "total": 0.220132 }, "sirius::Simulation_context::init_atoms_to_grid_idx": { - "avg": 0.003549, + "avg": 0.002049, "count": 1, - "max": 0.003549, - "min": 0.003549, - "total": 0.003549 + "max": 0.002049, + "min": 0.002049, + "total": 0.002049 }, "sirius::Simulation_context::init_comm": { - "avg": 0.00053, + "avg": 0.000247, "count": 1, - "max": 0.00053, - "min": 0.00053, - "total": 0.00053 + "max": 0.000247, + "min": 0.000247, + "total": 0.000247 }, "sirius::Simulation_context::init_fft": { - "avg": 0.026966, + "avg": 0.019825, "count": 1, - "max": 0.026966, - "min": 0.026966, - "total": 0.026966 + "max": 0.019825, + "min": 0.019825, + "total": 0.019825 }, "sirius::Simulation_context::initialize": { - "avg": 3.66799, + "avg": 3.341646, "count": 1, - "max": 3.66799, - "min": 3.66799, - "total": 3.66799 + "max": 3.341646, + "min": 3.341646, + "total": 3.341646 }, "sirius::Simulation_context::make_periodic_function": { - "avg": 0.07993033333333334, - "count": 3, - "max": 0.089083, - "min": 0.073711, - "total": 0.239791 + "avg": 0.0003191666666666667, + "count": 6, + "max": 0.000455, + "min": 0.000194, + "total": 0.0019150000000000003 }, "sirius::Simulation_context::update": { - "avg": 0.320064, + "avg": 0.189845, "count": 1, - "max": 0.320064, - "min": 0.320064, - "total": 0.320064 + "max": 0.189845, + "min": 0.189845, + "total": 0.189845 }, "sirius::Simulation_parameters::import": { - "avg": 0.000626, + "avg": 0.000163, "count": 1, - "max": 0.000626, - "min": 0.000626, - "total": 0.000626 + "max": 0.000163, + "min": 0.000163, + "total": 0.000163 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.0032050607287449394, - "count": 247, - "max": 0.006948, - "min": 0.001362, - "total": 0.7916500000000001 + "avg": 0.0004667674418604651, + "count": 258, + "max": 0.001776, + "min": 0.000186, + "total": 0.12042599999999999 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 5.05e-05, + "avg": 3.5e-05, "count": 8, - "max": 6.3e-05, - "min": 4.5e-05, - "total": 0.000404 + "max": 3.8e-05, + "min": 3.2e-05, + "total": 0.00028 }, "sirius::Smooth_periodic_function|inner": { - "avg": 7.790036900368999e-05, - "count": 271, - "max": 0.000262, - "min": 5.4e-05, - "total": 0.021110999999999987 + "avg": 9.528467153284671e-05, + "count": 274, + "max": 0.000204, + "min": 6.4e-05, + "total": 0.026108 + }, + "sirius::Stress|ewald": { + "avg": 0.001355, + "count": 1, + "max": 0.001355, + "min": 0.001355, + "total": 0.001355 + }, + "sirius::Stress|har": { + "avg": 0.000496, + "count": 1, + "max": 0.000496, + "min": 0.000496, + "total": 0.000496 + }, + "sirius::Stress|kin": { + "avg": 0.002105, + "count": 1, + "max": 0.002105, + "min": 0.002105, + "total": 0.002105 + }, + "sirius::Stress|nonloc": { + "avg": 0.122158, + "count": 1, + "max": 0.122158, + "min": 0.122158, + "total": 0.122158 + }, + "sirius::Stress|us": { + "avg": 3.368923, + "count": 1, + "max": 3.368923, + "min": 3.368923, + "total": 3.368923 + }, + "sirius::Stress|us|gemm": { + "avg": 0.008486833333333334, + "count": 36, + "max": 0.011121, + "min": 0.007571, + "total": 0.305526 + }, + "sirius::Stress|us|phase_fac": { + "avg": 0.000117, + "count": 1, + "max": 0.000117, + "min": 0.000117, + "total": 0.000117 + }, + "sirius::Stress|us|prepare": { + "avg": 0.0001775, + "count": 36, + "max": 0.000238, + "min": 0.000136, + "total": 0.00639 + }, + "sirius::Stress|vloc": { + "avg": 0.001063, + "count": 1, + "max": 0.001063, + "min": 0.001063, + "total": 0.001063 }, "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.000243, + "avg": 0.000303, "count": 2, - "max": 0.000364, - "min": 0.000122, - "total": 0.000486 + "max": 0.000505, + "min": 0.000101, + "total": 0.000606 }, "sirius::Unit_cell::initialize": { - "avg": 0.107362, + "avg": 0.093033, "count": 1, - "max": 0.107362, - "min": 0.107362, - "total": 0.107362 + "max": 0.093033, + "min": 0.093033, + "total": 0.093033 }, "sirius::Unit_cell::update": { - "avg": 0.0002485, + "avg": 0.000308, "count": 2, - "max": 0.000371, - "min": 0.000126, - "total": 0.000497 + "max": 0.00051, + "min": 0.000106, + "total": 0.000616 } } } \ No newline at end of file diff --git a/verification/test11/sirius.json b/verification/test11/sirius.json index 257202486..c392c912f 100644 --- a/verification/test11/sirius.json +++ b/verification/test11/sirius.json @@ -5,7 +5,9 @@ "gen_evp_solver_type" : "lapack", "verbosity" : 1, "!print_checksum" : true, - "!verification" : 1 + "!verification" : 1, + "print_forces" : true, + "print_stress" : true }, "iterative_solver" : { "tolerance" : 1e-12, diff --git a/verification/test14/output_ref.json b/verification/test14/output_ref.json index 1a45c9111..25d534502 100644 --- a/verification/test14/output_ref.json +++ b/verification/test14/output_ref.json @@ -1,602 +1,600 @@ { "comm_world_size": 1, "counters": { - "band_evp_work_count": 1423.7594531250004, - "local_operator_num_applied": 4838 + "band_evp_work_count": 1414.311437500001, + "local_operator_num_applied": 4833 }, - "git_hash": "5c67ba315d742d282eadee8053b167c7aa93c0da", + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, "band_gap": 0.0, "chemical_formula": "SrVO3", "converged": true, "core_leakage": 0.0, - "efermi": 0.4253347923821231, + "efermi": 0.42534053203583505, "energy": { "bxc": 0.0, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": -8.122419613198069, + "eval_sum": -8.122180272899788, "ewald": -114.22303279293868, - "exc": -30.179240604777675, - "kin": 62.853196030130725, - "total": -156.78011261870085, - "veff": -70.9756156433288, - "vha": 71.65791900120391, - "vxc": -31.573539892815504 + "exc": -30.17910198260357, + "kin": 62.85257159944002, + "total": -156.78011271750822, + "veff": -70.97475187233981, + "vha": 71.6566318455137, + "vxc": -31.572518253690674 }, "fft_coarse_grid": [30,30,30], - "fft_grid": [48,48,48], + "fft_grid": [50,50,50], + "forces": [ + [8.261391930692068e-16,7.979213901057464e-16,1.09720922186549e-15], + [-1.335754713697298e-15,-1.41240382872692e-15,-1.2961840554749794e-15], + [4.057768092968444e-15,-3.7328268949523387e-16,-2.994382366357319e-16], + [-1.8927079120050556e-16,4.061217543226888e-15,-2.2407185734217745e-16], + [-1.9922240148997131e-16,-3.063171538069509e-16,3.965517412798965e-15] + ], "mpi_grid": [1,1], "num_atoms": 5, "num_bands": 40, "num_fv_states": 40, "num_scf_iterations": 14, "omega": 382.708923702537, - "pw_cutoff": 20.0 + "pw_cutoff": 20.0, + "stress": [ + [-3.661902578716396e-05,1.755404897227949e-36,5.385290506494084e-28], + [1.755404897227949e-36,-3.6619025787162224e-05,-5.385290487333526e-28], + [5.385290506494084e-28,-5.385290487333526e-28,-3.6619025787033854e-05] + ] }, "task": 0, "threads_per_rank": 8, "timers": { - "Eigensolver_lapack::solve_std": { - "avg": 0.002370188524590164, - "count": 122, - "max": 0.004386, - "min": 0.000802, - "total": 0.289163 - }, - "Eigensolver_lapack::solve_std|zheevr": { - "avg": 0.002357762295081967, - "count": 122, - "max": 0.004373, - "min": 0.000792, - "total": 0.287647 + "Eigensolver_lapack|zheevr": { + "avg": 0.0020729495798319317, + "count": 119, + "max": 0.004606, + "min": 0.000923, + "total": 0.24668099999999987 + }, + "Eigensolver_lapack|zhegvx": { + "avg": 0.001070875, + "count": 64, + "max": 0.0013, + "min": 0.00084, + "total": 0.068536 }, "sddk::FFT3D::FFT3D": { - "avg": 0.0251735, + "avg": 0.004295, "count": 2, - "max": 0.047072, - "min": 0.003275, - "total": 0.050347 + "max": 0.005307, + "min": 0.003283, + "total": 0.00859 }, "sddk::FFT3D::prepare": { - "avg": 5.250000000000003e-05, + "avg": 6.65769230769231e-05, "count": 156, - "max": 0.000224, - "min": 3.8e-05, - "total": 0.008190000000000005 + "max": 0.000182, + "min": 4.6e-05, + "total": 0.010386000000000005 }, "sddk::FFT3D::prepare|cpu": { - "avg": 4.961538461538462e-05, + "avg": 6.067307692307691e-05, "count": 156, - "max": 0.0002, - "min": 3.7e-05, - "total": 0.00774 + "max": 0.000157, + "min": 4.2e-05, + "total": 0.009464999999999998 }, "sddk::FFT3D::transform": { - "avg": 0.0008611176729062348, - "count": 11379, - "max": 0.006212, - "min": 0.000293, - "total": 9.798658000000046 + "avg": 0.0003629864237540519, + "count": 11417, + "max": 0.001799, + "min": 0.00022, + "total": 4.144216000000011 }, "sddk::FFT3D::transform_xy": { - "avg": 0.00017642015994375783, - "count": 11379, - "max": 0.001164, - "min": 0.000142, - "total": 2.0074850000000204 + "avg": 0.0002535826399229215, + "count": 11417, + "max": 0.001026, + "min": 0.000149, + "total": 2.8951529999999943 }, "sddk::FFT3D::transform_z": { - "avg": 0.0006790142367519128, - "count": 11379, - "max": 0.005625, - "min": 8.7e-05, - "total": 7.726503000000016 + "avg": 0.00010228343697994272, + "count": 11417, + "max": 0.000733, + "min": 4.9e-05, + "total": 1.167770000000006 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.0006763644432726926, - "count": 11379, - "max": 0.005621, - "min": 8.4e-05, - "total": 7.696350999999969 + "avg": 9.926145222037341e-05, + "count": 11417, + "max": 0.000729, + "min": 4.6e-05, + "total": 1.1332680000000033 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.0006735467967308227, - "count": 11379, - "max": 0.005613, - "min": 8.1e-05, - "total": 7.664289000000031 + "avg": 9.59594464395206e-05, + "count": 11417, + "max": 0.000707, + "min": 4.4e-05, + "total": 1.0955690000000067 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.0005994166666666666, + "avg": 0.0006025833333333333, "count": 12, - "max": 0.0022, - "min": 0.000167, - "total": 0.007193 + "max": 0.002196, + "min": 0.000229, + "total": 0.0072310000000000004 }, "sddk::Gvec::init": { - "avg": 0.0025315, + "avg": 0.0018433333333333338, "count": 6, - "max": 0.010437, - "min": 0.000596, - "total": 0.015189 - }, - "sddk::Wave_functions::transform": { - "avg": 0.000821447619047619, - "count": 315, - "max": 0.002828, - "min": 0.000186, - "total": 0.258756 - }, - "sddk::Wave_functions::transform|init": { - "avg": 7.077777777777786e-05, - "count": 315, - "max": 0.000897, - "min": 3e-06, - "total": 0.022295000000000023 + "max": 0.0074, + "min": 0.000496, + "total": 0.011060000000000002 }, "sddk::inner": { - "avg": 0.0003591113360323883, - "count": 494, - "max": 0.001355, - "min": 6e-06, - "total": 0.1774009999999998 + "avg": 0.0003592000000000005, + "count": 485, + "max": 0.001404, + "min": 7e-06, + "total": 0.17421200000000023 }, "sddk::inner|local": { - "avg": 0.0003553076923076919, - "count": 494, - "max": 0.00135, - "min": 4e-06, - "total": 0.1755219999999998 + "avg": 0.00035497938144329953, + "count": 485, + "max": 0.001395, + "min": 5e-06, + "total": 0.17216500000000026 }, "sddk::matrix_storage::matrix_storage": { - "avg": 1.1342105263157954e-06, + "avg": 2.4655263157894717e-05, "count": 380, - "max": 2.6e-05, + "max": 0.000197, "min": 0.0, - "total": 0.00043100000000000224 + "total": 0.009368999999999992 }, "sddk::matrix_storage::remap_backward": { - "avg": 1.4139784946236545e-06, - "count": 186, - "max": 7e-06, + "avg": 1.1256830601092874e-06, + "count": 183, + "max": 1.2e-05, "min": 0.0, - "total": 0.0002629999999999997 + "total": 0.0002059999999999996 }, "sddk::matrix_storage::remap_forward": { - "avg": 5.01219512195123e-06, - "count": 246, - "max": 1.1e-05, + "avg": 5.522633744855967e-06, + "count": 243, + "max": 1e-05, "min": 3e-06, - "total": 0.0012330000000000028 + "total": 0.001342 }, "sddk::matrix_storage::set_num_extra": { - "avg": 1.368055555555562e-06, - "count": 432, + "avg": 1.3920187793427288e-06, + "count": 426, "max": 5e-06, "min": 0.0, - "total": 0.0005910000000000028 + "total": 0.0005930000000000025 }, "sddk::orthogonalize": { - "avg": 0.0018962786885245912, - "count": 122, - "max": 0.005574, - "min": 0.000317, - "total": 0.23134600000000013 + "avg": 0.0014190756302521008, + "count": 119, + "max": 0.004025, + "min": 0.000283, + "total": 0.16887 }, "sddk::orthogonalize|tmtrx": { - "avg": 3.04344262295082e-05, - "count": 122, - "max": 0.001006, - "min": 3e-06, - "total": 0.0037130000000000006 + "avg": 3.849579831932775e-05, + "count": 119, + "max": 0.000139, + "min": 2e-06, + "total": 0.004581000000000002 }, "sddk::orthogonalize|transform": { - "avg": 0.000352360655737705, - "count": 122, - "max": 0.00114, - "min": 2.1e-05, - "total": 0.042988000000000005 + "avg": 0.00020922689075630259, + "count": 119, + "max": 0.000636, + "min": 1.1e-05, + "total": 0.024898000000000007 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.011025, + "avg": 0.005904, "count": 1, - "max": 0.011025, - "min": 0.011025, - "total": 0.011025 + "max": 0.005904, + "min": 0.005904, + "total": 0.005904 }, "sddk::remap_gvec_to_shells|remap_backward": { - "avg": 0.002798666666666666, + "avg": 0.0006076333333333335, "count": 30, - "max": 0.003093, - "min": 0.002681, - "total": 0.08395999999999998 + "max": 0.000937, + "min": 0.000437, + "total": 0.018229000000000006 }, "sddk::remap_gvec_to_shells|remap_forward": { - "avg": 0.002957733333333333, + "avg": 0.0005701666666666666, "count": 30, - "max": 0.004053, - "min": 0.002704, - "total": 0.08873199999999999 + "max": 0.000978, + "min": 0.000456, + "total": 0.017105 + }, + "sddk::transform": { + "avg": 0.0005137741935483871, + "count": 310, + "max": 0.001931, + "min": 9.2e-05, + "total": 0.15927 + }, + "sddk::transform|init": { + "avg": 6.200967741935487e-05, + "count": 310, + "max": 0.000629, + "min": 2e-06, + "total": 0.01922300000000001 + }, + "sddk::transform|local": { + "avg": 0.00020281333333333325, + "count": 675, + "max": 0.001155, + "min": 3.8e-05, + "total": 0.13689899999999994 }, "sirius::Atom_type::init": { - "avg": 0.018783666666666667, + "avg": 0.018407000000000003, "count": 3, - "max": 0.026171, - "min": 0.01127, - "total": 0.056351 + "max": 0.023417, + "min": 0.011366, + "total": 0.055221000000000006 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.10550233333333332, + "avg": 0.10977566666666667, "count": 3, - "max": 0.121108, - "min": 0.08079, - "total": 0.316507 + "max": 0.123906, + "min": 0.086718, + "total": 0.32932700000000004 }, "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { - "avg": 0.07568777777777777, + "avg": 0.10688222222222221, "count": 9, - "max": 0.095166, - "min": 0.044909, - "total": 0.68119 + "max": 0.139578, + "min": 0.059292, + "total": 0.9619399999999999 }, "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { - "avg": 0.07565133333333335, + "avg": 0.09662777777777777, "count": 9, - "max": 0.095113, - "min": 0.044879, - "total": 0.6808620000000001 + "max": 0.131675, + "min": 0.050153, + "total": 0.8696499999999999 }, "sirius::Augmentation_operator_gvec_deriv|constructor": { - "avg": 0.068141, + "avg": 0.078111, "count": 1, - "max": 0.068141, - "min": 0.068141, - "total": 0.068141 + "max": 0.078111, + "min": 0.078111, + "total": 0.078111 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.15768818333333331, + "avg": 0.08960650000000005, "count": 60, - "max": 0.284976, - "min": 0.077302, - "total": 9.461291 + "max": 0.154788, + "min": 0.045334, + "total": 5.3763900000000024 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 2.28e-05, + "avg": 0.00017658333333333336, "count": 60, - "max": 5.6e-05, - "min": 1.9e-05, - "total": 0.0013679999999999999 + "max": 0.000499, + "min": 2.9e-05, + "total": 0.010595000000000002 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.0019399230769230776, - "count": 182, - "max": 0.004389, - "min": 0.000804, - "total": 0.3530660000000001 + "avg": 0.0017417486033519554, + "count": 179, + "max": 0.004613, + "min": 0.000843, + "total": 0.311773 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.15290441666666668, + "avg": 0.08382853333333336, "count": 60, - "max": 0.279141, - "min": 0.073204, - "total": 9.174265 + "max": 0.148855, + "min": 0.03901, + "total": 5.029712000000002 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 0.0009647333333333333, + "avg": 0.0005126166666666667, "count": 60, - "max": 0.002488, - "min": 0.000397, - "total": 0.057884 + "max": 0.001211, + "min": 0.000269, + "total": 0.030757000000000003 }, "sirius::Band::initialize_subspace": { - "avg": 1.300777, + "avg": 0.139387, "count": 1, - "max": 1.300777, - "min": 1.300777, - "total": 1.300777 + "max": 0.139387, + "min": 0.139387, + "total": 0.139387 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.324411, + "avg": 0.034651749999999995, "count": 4, - "max": 0.334046, - "min": 0.316697, - "total": 1.297644 + "max": 0.036031, + "min": 0.033099, + "total": 0.13860699999999998 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.24882200000000002, + "avg": 0.0012017500000000001, "count": 4, - "max": 0.254051, - "min": 0.244049, - "total": 0.9952880000000001 + "max": 0.0015790000000000001, + "min": 0.000963, + "total": 0.0048070000000000005 }, "sirius::Band::residuals": { - "avg": 0.0006809890109890111, - "count": 182, - "max": 0.00238, - "min": 0.0, - "total": 0.12394000000000004 + "avg": 0.0006326592178770951, + "count": 179, + "max": 0.001703, + "min": 1e-06, + "total": 0.11324600000000003 }, "sirius::Band::residuals_aux": { - "avg": 0.0003103100775193798, - "count": 129, - "max": 0.000744, - "min": 0.00021, - "total": 0.040029999999999996 + "avg": 0.00046181102362204737, + "count": 127, + "max": 0.000737, + "min": 0.000279, + "total": 0.058650000000000015 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.0005516639999999997, - "count": 250, - "max": 0.001846, - "min": 0.000188, - "total": 0.13791599999999993 + "avg": 0.0005722388663967614, + "count": 247, + "max": 0.001616, + "min": 0.000261, + "total": 0.14134300000000005 }, "sirius::Band::solve": { - "avg": 0.6329151333333333, + "avg": 0.3596037333333334, "count": 15, - "max": 1.0274, - "min": 0.320489, - "total": 9.493727 + "max": 0.55169, + "min": 0.186473, + "total": 5.394056000000001 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.002468, + "avg": 0.0025044999999999998, "count": 4, - "max": 0.002593, - "min": 0.002335, - "total": 0.009872 + "max": 0.002548, + "min": 0.002442, + "total": 0.010017999999999999 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.00125, + "avg": 0.001238, "count": 4, - "max": 0.001326, - "min": 0.001123, - "total": 0.005 + "max": 0.001319, + "min": 0.001144, + "total": 0.004952 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 4.071428571428567e-07, + "avg": 4.642857142857137e-07, "count": 140, - "max": 3e-06, + "max": 1e-06, "min": 0.0, - "total": 5.6999999999999935e-05 + "total": 6.499999999999991e-05 }, "sirius::Beta_projectors_base::generate": { - "avg": 0.000584576923076923, + "avg": 0.0005957884615384617, "count": 52, - "max": 0.001223, - "min": 0.000478, - "total": 0.030397999999999998 + "max": 0.001444, + "min": 0.000466, + "total": 0.03098100000000001 }, "sirius::Beta_projectors_base::inner": { - "avg": 0.0005744006622516557, - "count": 302, - "max": 0.003059, - "min": 7.8e-05, - "total": 0.17346900000000004 + "avg": 0.0005138160535117056, + "count": 299, + "max": 0.001146, + "min": 8.5e-05, + "total": 0.153631 }, "sirius::Beta_projectors_base::local_inner_aux": { - "avg": 0.0005689569536423835, - "count": 302, - "max": 0.003048, - "min": 7.4e-05, - "total": 0.1718249999999998 + "avg": 0.0005086153846153851, + "count": 299, + "max": 0.001136, + "min": 8.2e-05, + "total": 0.15207600000000016 }, "sirius::Beta_projectors_base::prepare": { - "avg": 2.375e-06, + "avg": 3.750000000000001e-06, "count": 8, - "max": 3e-06, - "min": 2e-06, - "total": 1.9e-05 + "max": 9e-06, + "min": 1e-06, + "total": 3.0000000000000008e-05 }, "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { - "avg": 0.00460975, + "avg": 0.0050565, "count": 4, - "max": 0.004976, - "min": 0.004311, - "total": 0.018439 + "max": 0.005551, + "min": 0.004537, + "total": 0.020226 }, "sirius::Broyden1::mix": { - "avg": 0.0026141999999999997, + "avg": 0.0016583333333333335, "count": 15, - "max": 0.004128, - "min": 0.000158, - "total": 0.039213 + "max": 0.003039, + "min": 6.4e-05, + "total": 0.024875 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.104297, + "avg": 0.000744, "count": 1, - "max": 0.104297, - "min": 0.104297, - "total": 0.104297 - }, - "sirius::DFT_ground_state::forces": { - "avg": 0.000133, - "count": 1, - "max": 0.000133, - "min": 0.000133, - "total": 0.000133 + "max": 0.000744, + "min": 0.000744, + "total": 0.000744 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 29.791178, + "avg": 9.08136, "count": 1, - "max": 29.791178, - "min": 29.791178, - "total": 29.791178 + "max": 9.08136, + "min": 9.08136, + "total": 9.08136 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 1.9856096000000003, + "avg": 0.6050102, "count": 15, - "max": 2.378996, - "min": 1.661843, - "total": 29.784144000000005 + "max": 0.803404, + "min": 0.432665, + "total": 9.075153 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.0006967666666666668, + "avg": 0.0006914833333333334, "count": 60, - "max": 0.001628, - "min": 0.000553, - "total": 0.04180600000000001 + "max": 0.001048, + "min": 0.000533, + "total": 0.041489000000000005 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.020497966666666666, + "avg": 0.010557049999999995, "count": 60, - "max": 0.024534, - "min": 0.018727, - "total": 1.229878 + "max": 0.012707, + "min": 0.008062, + "total": 0.6334229999999997 }, "sirius::Density::augment": { - "avg": 0.42234820000000006, + "avg": 0.11287146666666664, "count": 15, - "max": 0.43505, - "min": 0.398563, - "total": 6.335223000000001 + "max": 0.12134, + "min": 0.102006, + "total": 1.6930719999999997 }, "sirius::Density::generate": { - "avg": 0.5090758666666668, + "avg": 0.15852046666666667, "count": 15, - "max": 0.526711, - "min": 0.484681, - "total": 7.636138000000002 + "max": 0.168215, + "min": 0.143269, + "total": 2.377807 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.110244, + "avg": 0.001881, "count": 1, - "max": 0.110244, - "min": 0.110244, - "total": 0.110244 + "max": 0.001881, + "min": 0.001881, + "total": 0.001881 }, "sirius::Density::generate_rho_aug": { - "avg": 0.42214260000000003, + "avg": 0.11264720000000002, "count": 15, - "max": 0.434885, - "min": 0.398232, - "total": 6.332139000000001 + "max": 0.121181, + "min": 0.101738, + "total": 1.6897080000000002 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.02209686666666667, - "count": 45, - "max": 0.044024, - "min": 0.007562, - "total": 0.9943590000000001 - }, - "sirius::Density::generate_rho_aug|phase_fac": { - "avg": 0.1051554888888889, + "avg": 0.02311273333333334, "count": 45, - "max": 0.110706, - "min": 0.100183, - "total": 4.731997000000001 + "max": 0.054058, + "min": 0.005025, + "total": 1.0400730000000002 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.00577891111111111, + "avg": 0.005350622222222222, "count": 45, - "max": 0.008389, - "min": 0.002943, - "total": 0.260051 + "max": 0.007828, + "min": 0.002113, + "total": 0.240778 }, "sirius::Density::generate_valence": { - "avg": 0.5090707333333333, + "avg": 0.15851506666666668, "count": 15, - "max": 0.526706, - "min": 0.48467, - "total": 7.636061 + "max": 0.16821, + "min": 0.143264, + "total": 2.377726 }, "sirius::Density::initial_density": { - "avg": 0.112824, + "avg": 0.002954, "count": 1, - "max": 0.112824, - "min": 0.112824, - "total": 0.112824 + "max": 0.002954, + "min": 0.002954, + "total": 0.002954 }, "sirius::Density::symmetrize_density_matrix": { - "avg": 0.0030039333333333326, + "avg": 0.0040214, "count": 15, - "max": 0.004971, - "min": 0.002782, - "total": 0.04505899999999999 + "max": 0.00527, + "min": 0.003169, + "total": 0.060321 }, "sirius::Density::update": { - "avg": 0.110312, + "avg": 0.001905, "count": 1, - "max": 0.110312, - "min": 0.110312, - "total": 0.110312 + "max": 0.001905, + "min": 0.001905, + "total": 0.001905 }, "sirius::Field4D::symmetrize": { - "avg": 0.012365866666666666, + "avg": 0.008377266666666668, "count": 30, - "max": 0.014701, - "min": 0.010799, - "total": 0.370976 + "max": 0.01035, + "min": 0.006011, + "total": 0.25131800000000004 }, "sirius::Force::calc_forces_core": { - "avg": 0.006956, + "avg": 0.004052, "count": 1, - "max": 0.006956, - "min": 0.006956, - "total": 0.006956 + "max": 0.004052, + "min": 0.004052, + "total": 0.004052 }, "sirius::Force::calc_forces_ewald": { - "avg": 1.115603, + "avg": 0.003123, "count": 1, - "max": 1.115603, - "min": 1.115603, - "total": 1.115603 + "max": 0.003123, + "min": 0.003123, + "total": 0.003123 }, "sirius::Force::calc_forces_nonloc": { - "avg": 0.021117, + "avg": 0.018517, "count": 1, - "max": 0.021117, - "min": 0.021117, - "total": 0.021117 + "max": 0.018517, + "min": 0.018517, + "total": 0.018517 }, "sirius::Force::calc_forces_scf_corr": { - "avg": 0.002268, + "avg": 0.002385, "count": 1, - "max": 0.002268, - "min": 0.002268, - "total": 0.002268 + "max": 0.002385, + "min": 0.002385, + "total": 0.002385 }, "sirius::Force::calc_forces_us": { - "avg": 0.989139, + "avg": 0.052276, "count": 1, - "max": 0.989139, - "min": 0.989139, - "total": 0.989139 + "max": 0.052276, + "min": 0.052276, + "total": 0.052276 }, "sirius::Force::calc_forces_vloc": { - "avg": 0.002372, + "avg": 0.002545, "count": 1, - "max": 0.002372, - "min": 0.002372, - "total": 0.002372 + "max": 0.002545, + "min": 0.002545, + "total": 0.002545 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.04585662365591399, - "count": 186, - "max": 0.076753, - "min": 0.001939, - "total": 8.529332000000002 + "avg": 0.02371593989071038, + "count": 183, + "max": 0.047346, + "min": 0.001042, + "total": 4.340017 }, "sirius::Hamiltonian::get_h_diag": { - "avg": 0.002324066666666666, + "avg": 0.0024239833333333334, "count": 60, - "max": 0.00415, - "min": 0.002006, - "total": 0.13944399999999996 + "max": 0.003047, + "min": 0.001518, + "total": 0.145439 }, "sirius::Hamiltonian::get_o_diag": { - "avg": 0.0023635833333333326, + "avg": 0.002440433333333333, "count": 60, - "max": 0.0036, - "min": 0.001965, - "total": 0.14181499999999994 - }, - "sirius::Hamiltonian::prepare": { - "avg": 1.9375000000000003e-05, - "count": 16, - "max": 2.2e-05, - "min": 1.7e-05, - "total": 0.00031000000000000005 + "max": 0.003184, + "min": 0.001486, + "total": 0.14642599999999997 }, "sirius::K_point::K_point": { "avg": 1.7499999999999998e-06, @@ -606,445 +604,445 @@ "total": 6.999999999999999e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.0007779999999999999, + "avg": 0.000526, "count": 4, - "max": 0.00081, - "min": 0.000746, - "total": 0.0031119999999999997 + "max": 0.000552, + "min": 0.000515, + "total": 0.002104 }, "sirius::K_point::initialize": { - "avg": 0.00386125, + "avg": 0.0034084999999999996, "count": 4, - "max": 0.003967, - "min": 0.003727, - "total": 0.015445 + "max": 0.003531, + "min": 0.003296, + "total": 0.013633999999999999 }, "sirius::K_point::update": { - "avg": 0.0029109999999999995, + "avg": 0.0028345, "count": 4, - "max": 0.00306, - "min": 0.002719, - "total": 0.011643999999999998 + "max": 0.002899, + "min": 0.002736, + "total": 0.011338 }, "sirius::K_point_set::add_kpoint": { - "avg": 8e-06, + "avg": 5.2500000000000006e-06, "count": 4, - "max": 2.3e-05, + "max": 1.4e-05, "min": 2e-06, - "total": 3.2e-05 + "total": 2.1000000000000002e-05 }, "sirius::K_point_set::create_k_mesh": { - "avg": 0.022573, + "avg": 0.023765, "count": 1, - "max": 0.022573, - "min": 0.022573, - "total": 0.022573 + "max": 0.023765, + "min": 0.023765, + "total": 0.023765 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 0.0003133333333333333, + "avg": 0.000444, "count": 15, - "max": 0.00038, - "min": 0.000264, - "total": 0.0047 + "max": 0.000617, + "min": 0.000318, + "total": 0.00666 }, "sirius::K_point_set::initialize": { - "avg": 0.015876, + "avg": 0.013739, "count": 1, - "max": 0.015876, - "min": 0.015876, - "total": 0.015876 + "max": 0.013739, + "min": 0.013739, + "total": 0.013739 }, "sirius::K_point_set::sync_band_energies": { - "avg": 9.999999999999999e-06, + "avg": 7.133333333333332e-06, "count": 15, - "max": 1.2e-05, - "min": 9e-06, - "total": 0.00015 + "max": 1.4e-05, + "min": 5e-06, + "total": 0.00010699999999999997 }, "sirius::Local_operator::apply_h": { - "avg": 0.04404544623655914, - "count": 186, - "max": 0.07426, - "min": 0.00161, - "total": 8.192453 + "avg": 0.022203027322404367, + "count": 183, + "max": 0.044545, + "min": 0.00072, + "total": 4.063153999999999 }, "sirius::Local_operator::prepare": { - "avg": 0.00036896249999999987, + "avg": 0.00015042499999999996, "count": 80, - "max": 0.00301, - "min": 3.3e-05, - "total": 0.029516999999999988 + "max": 0.000705, + "min": 3.6e-05, + "total": 0.012033999999999996 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 1.15625e-06, + "avg": 1.21875e-06, "count": 32, "max": 2e-06, "min": 0.0, - "total": 3.7e-05 + "total": 3.9e-05 }, "sirius::Non_local_operator::apply": { - "avg": 0.0005587311827956991, - "count": 372, - "max": 0.001336, - "min": 9.1e-05, - "total": 0.20784800000000006 + "avg": 0.00036323770491803267, + "count": 366, + "max": 0.000818, + "min": 0.000102, + "total": 0.13294499999999995 }, "sirius::Periodic_function::add": { - "avg": 0.00015128125, + "avg": 9.787500000000001e-05, "count": 32, - "max": 0.000246, - "min": 7.3e-05, - "total": 0.004841 + "max": 0.000152, + "min": 6.1e-05, + "total": 0.0031320000000000002 }, "sirius::Periodic_function::inner": { - "avg": 0.00012364341085271315, + "avg": 9.834883720930233e-05, "count": 129, - "max": 0.000281, - "min": 5.6e-05, - "total": 0.015949999999999995 + "max": 0.000156, + "min": 6.7e-05, + "total": 0.012687 }, "sirius::Periodic_function::integrate": { - "avg": 6.36875e-05, + "avg": 8.731249999999999e-05, "count": 16, - "max": 0.000123, - "min": 5.5e-05, - "total": 0.001019 + "max": 0.00014, + "min": 6.3e-05, + "total": 0.0013969999999999998 }, "sirius::Potential::Potential": { - "avg": 0.116358, + "avg": 0.009048, "count": 1, - "max": 0.116358, - "min": 0.116358, - "total": 0.116358 + "max": 0.009048, + "min": 0.009048, + "total": 0.009048 }, "sirius::Potential::generate": { - "avg": 0.80199825, + "avg": 0.060300375, "count": 16, - "max": 0.824818, - "min": 0.792934, - "total": 12.831972 + "max": 0.066258, + "min": 0.053194, + "total": 0.964806 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.6229333750000001, + "avg": 0.015021062499999998, "count": 16, - "max": 0.638549, - "min": 0.616761, - "total": 9.966934000000002 + "max": 0.016767, + "min": 0.013879, + "total": 0.24033699999999997 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 1.6875000000000004e-06, + "avg": 5.625e-07, "count": 16, - "max": 1.9e-05, + "max": 1e-06, "min": 0.0, - "total": 2.7000000000000006e-05 + "total": 9e-06 }, "sirius::Potential::generate_local_potential": { - "avg": 0.111105, + "avg": 0.003215, "count": 1, - "max": 0.111105, - "min": 0.111105, - "total": 0.111105 + "max": 0.003215, + "min": 0.003215, + "total": 0.003215 }, "sirius::Potential::poisson": { - "avg": 0.11027062500000001, + "avg": 0.001172375, "count": 16, - "max": 0.111971, - "min": 0.105733, - "total": 1.7643300000000002 + "max": 0.001364, + "min": 0.001045, + "total": 0.018758 }, "sirius::Potential::update": { - "avg": 0.11117, + "avg": 0.003243, "count": 1, - "max": 0.11117, - "min": 0.11117, - "total": 0.11117 + "max": 0.003243, + "min": 0.003243, + "total": 0.003243 }, "sirius::Potential::xc": { - "avg": 0.063236, + "avg": 0.04284175, "count": 16, - "max": 0.073464, - "min": 0.060567, - "total": 1.011776 + "max": 0.048661, + "min": 0.036543, + "total": 0.685468 }, "sirius::Potential::xc_rg_nonmagnetic": { - "avg": 0.0632266875, + "avg": 0.042833875, "count": 16, - "max": 0.073456, - "min": 0.06056, - "total": 1.011627 + "max": 0.048653, + "min": 0.036537, + "total": 0.685342 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.2116515, + "avg": 0.2240315, "count": 2, - "max": 0.218407, - "min": 0.204896, - "total": 0.423303 + "max": 0.232965, + "min": 0.215098, + "total": 0.448063 }, "sirius::Radial_integrals|aug": { - "avg": 1.0823675, + "avg": 1.1045675, "count": 2, - "max": 1.213125, - "min": 0.95161, - "total": 2.164735 + "max": 1.231836, + "min": 0.977299, + "total": 2.209135 }, "sirius::Radial_integrals|beta": { - "avg": 0.2557785, + "avg": 0.2469945, "count": 2, - "max": 0.25862, - "min": 0.252937, - "total": 0.511557 + "max": 0.257468, + "min": 0.236521, + "total": 0.493989 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.050627500000000006, + "avg": 0.0532745, "count": 2, - "max": 0.0517, - "min": 0.049555, - "total": 0.10125500000000001 + "max": 0.060065, + "min": 0.046484, + "total": 0.106549 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.047065, + "avg": 0.044795, "count": 1, - "max": 0.047065, - "min": 0.047065, - "total": 0.047065 + "max": 0.044795, + "min": 0.044795, + "total": 0.044795 }, "sirius::Radial_integrals|vloc": { - "avg": 0.160642, + "avg": 0.168923, "count": 2, - "max": 0.173382, - "min": 0.147902, - "total": 0.321284 + "max": 0.173776, + "min": 0.16407, + "total": 0.337846 }, "sirius::Simulation_context::init_atoms_to_grid_idx": { - "avg": 0.001131, + "avg": 0.001044, "count": 1, - "max": 0.001131, - "min": 0.001131, - "total": 0.001131 + "max": 0.001044, + "min": 0.001044, + "total": 0.001044 }, "sirius::Simulation_context::init_comm": { - "avg": 0.002462, + "avg": 0.00026, "count": 1, - "max": 0.002462, - "min": 0.002462, - "total": 0.002462 + "max": 0.00026, + "min": 0.00026, + "total": 0.00026 }, "sirius::Simulation_context::init_fft": { - "avg": 0.077897, + "avg": 0.024287, "count": 1, - "max": 0.077897, - "min": 0.077897, - "total": 0.077897 + "max": 0.024287, + "min": 0.024287, + "total": 0.024287 }, "sirius::Simulation_context::initialize": { - "avg": 4.167044, + "avg": 4.083522, "count": 1, - "max": 4.167044, - "min": 4.167044, - "total": 4.167044 + "max": 4.083522, + "min": 4.083522, + "total": 4.083522 }, "sirius::Simulation_context::make_periodic_function": { - "avg": 0.10502399999999999, + "avg": 0.0009521666666666666, "count": 6, - "max": 0.107553, - "min": 0.103457, - "total": 0.6301439999999999 + "max": 0.001208, + "min": 0.000658, + "total": 0.005712999999999999 }, "sirius::Simulation_context::update": { - "avg": 0.436959, + "avg": 0.346468, "count": 1, - "max": 0.436959, - "min": 0.436959, - "total": 0.436959 + "max": 0.346468, + "min": 0.346468, + "total": 0.346468 }, "sirius::Simulation_parameters::import": { - "avg": 0.000244, + "avg": 0.000188, "count": 1, - "max": 0.000244, - "min": 0.000244, - "total": 0.000244 + "max": 0.000188, + "min": 0.000188, + "total": 0.000188 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.00404226, - "count": 250, - "max": 0.006737, - "min": 0.00139, - "total": 1.010565 + "avg": 0.0008648221476510064, + "count": 298, + "max": 0.001933, + "min": 0.00029, + "total": 0.2577169999999999 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 8.5e-05, + "avg": 5.4e-05, "count": 2, - "max": 9.4e-05, - "min": 7.6e-05, - "total": 0.00017 + "max": 5.7e-05, + "min": 5.1e-05, + "total": 0.000108 }, "sirius::Smooth_periodic_function|inner": { - "avg": 0.00011614124293785306, + "avg": 9.334463276836161e-05, "count": 177, - "max": 0.000264, - "min": 5.5e-05, - "total": 0.020556999999999992 + "max": 0.000151, + "min": 6.5e-05, + "total": 0.016522000000000005 }, "sirius::Stress|ewald": { - "avg": 0.004984, + "avg": 0.00328, "count": 1, - "max": 0.004984, - "min": 0.004984, - "total": 0.004984 + "max": 0.00328, + "min": 0.00328, + "total": 0.00328 }, "sirius::Stress|har": { - "avg": 0.002255, + "avg": 0.000717, "count": 1, - "max": 0.002255, - "min": 0.002255, - "total": 0.002255 + "max": 0.000717, + "min": 0.000717, + "total": 0.000717 }, "sirius::Stress|kin": { - "avg": 0.001272, + "avg": 0.001423, "count": 1, - "max": 0.001272, - "min": 0.001272, - "total": 0.001272 + "max": 0.001423, + "min": 0.001423, + "total": 0.001423 }, "sirius::Stress|nonloc": { - "avg": 0.077329, + "avg": 0.071045, "count": 1, - "max": 0.077329, - "min": 0.077329, - "total": 0.077329 + "max": 0.071045, + "min": 0.071045, + "total": 0.071045 }, "sirius::Stress|us": { - "avg": 1.219946, + "avg": 1.155109, "count": 1, - "max": 1.219946, - "min": 1.219946, - "total": 1.219946 + "max": 1.155109, + "min": 1.155109, + "total": 1.155109 }, "sirius::Stress|us|gemm": { - "avg": 0.004420148148148146, + "avg": 0.003584851851851852, "count": 27, - "max": 0.008265, - "min": 0.003713, - "total": 0.11934399999999995 + "max": 0.010503, + "min": 0.002513, + "total": 0.096791 }, "sirius::Stress|us|phase_fac": { - "avg": 0.10425466666666668, + "avg": 0.000245, "count": 3, - "max": 0.105343, - "min": 0.102221, - "total": 0.31276400000000004 + "max": 0.000268, + "min": 0.00022, + "total": 0.000735 }, "sirius::Stress|us|prepare": { - "avg": 0.0003422962962962964, + "avg": 0.00027562962962962963, "count": 27, - "max": 0.000597, - "min": 0.000217, - "total": 0.009242000000000002 + "max": 0.000616, + "min": 0.000176, + "total": 0.007442000000000001 }, "sirius::Stress|vloc": { - "avg": 0.213278, + "avg": 0.003903, "count": 1, - "max": 0.213278, - "min": 0.213278, - "total": 0.213278 + "max": 0.003903, + "min": 0.003903, + "total": 0.003903 }, "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.00042, + "avg": 0.000269, "count": 2, - "max": 0.000682, - "min": 0.000158, - "total": 0.00084 + "max": 0.000405, + "min": 0.000133, + "total": 0.000538 }, "sirius::Unit_cell::get_symmetry": { - "avg": 0.008898, + "avg": 0.010546, "count": 2, - "max": 0.011654, - "min": 0.006142, - "total": 0.017796 + "max": 0.01074, + "min": 0.010352, + "total": 0.021092 }, "sirius::Unit_cell::initialize": { - "avg": 0.068753, + "avg": 0.066021, "count": 1, - "max": 0.068753, - "min": 0.068753, - "total": 0.068753 + "max": 0.066021, + "min": 0.066021, + "total": 0.066021 }, "sirius::Unit_cell::update": { - "avg": 0.0093315, + "avg": 0.010833, "count": 2, - "max": 0.012346, - "min": 0.006317, - "total": 0.018663 + "max": 0.010899, + "min": 0.010767, + "total": 0.021666 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { - "avg": 0.008812, + "avg": 0.0105135, "count": 2, - "max": 0.011602, - "min": 0.006022, - "total": 0.017624 + "max": 0.010701, + "min": 0.010326, + "total": 0.021027 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { - "avg": 0.00808, + "avg": 0.010283, "count": 2, - "max": 0.010569, - "min": 0.005591, - "total": 0.01616 + "max": 0.01052, + "min": 0.010046, + "total": 0.020566 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { - "avg": 0.00045349999999999996, + "avg": 4.4e-05, "count": 2, - "max": 0.000859, - "min": 4.8e-05, - "total": 0.0009069999999999999 + "max": 5.8e-05, + "min": 3e-05, + "total": 8.8e-05 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { - "avg": 0.00021999999999999998, + "avg": 0.0001495, "count": 2, - "max": 0.000314, - "min": 0.000126, - "total": 0.00043999999999999996 + "max": 0.00018, + "min": 0.000119, + "total": 0.000299 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { - "avg": 3.6e-05, + "avg": 2.9e-05, "count": 2, - "max": 4.4e-05, - "min": 2.8e-05, - "total": 7.2e-05 + "max": 3.3e-05, + "min": 2.5e-05, + "total": 5.8e-05 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw": { - "avg": 0.012359966666666668, + "avg": 0.008372133333333334, "count": 30, - "max": 0.014692, - "min": 0.010794, - "total": 0.37079900000000005 + "max": 0.010346, + "min": 0.006007, + "total": 0.251164 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw|local": { - "avg": 0.006467766666666667, + "avg": 0.0071126333333333324, "count": 30, - "max": 0.007735, - "min": 0.005221, - "total": 0.194033 + "max": 0.009265, + "min": 0.005034, + "total": 0.21337899999999999 + }, + "sirius::divergence": { + "avg": 0.0058528750000000004, + "count": 16, + "max": 0.00645, + "min": 0.00483, + "total": 0.09364600000000001 }, "sirius::dot": { - "avg": 0.00094571875, - "count": 32, - "max": 0.001097, - "min": 0.000772, - "total": 0.030263 + "avg": 0.0012716875000000002, + "count": 16, + "max": 0.001482, + "min": 0.001111, + "total": 0.020347000000000004 }, "sirius::gradient": { - "avg": 0.002271, - "count": 33, - "max": 0.003175, - "min": 0.000311, - "total": 0.074943 - }, - "sirius::laplacian": { - "avg": 0.0009685, - "count": 16, - "max": 0.001074, - "min": 0.000911, - "total": 0.015496 + "avg": 0.001985470588235294, + "count": 17, + "max": 0.003127, + "min": 0.000431, + "total": 0.033753 } } } \ No newline at end of file diff --git a/verification/test15/F.pz-n-kjpaw_psl.0.1.UPF.json b/verification/test15/F.pz-n-kjpaw_psl.0.1.UPF.json new file mode 100644 index 000000000..4349dc374 --- /dev/null +++ b/verification/test15/F.pz-n-kjpaw_psl.0.1.UPF.json @@ -0,0 +1,277 @@ +{ + "pseudo_potential": { + "radial_grid": [ + 0.0001013202183949463, 0.0001025946698521136, 0.0001038851519351758, 0.0001051918662845838, 0.0001065150170771133, 0.0001078548110577677, 0.0001092114575720821, 0.0001105851685988346, 0.0001119761587831678, 0.0001133846454701283, 0.0001148108487386263, 0.0001162549914358243, 0.0001177172992119567, 0.0001191980005555885, 0.0001206973268293175, 0.0001222155123059243, 0.0001237527942049781, 0.0001253094127299036, 0.000126885611105512, 0.0001284816356160064, 0.0001300977356434638, 0.0001317341637068014, 0.0001333911755012334, 0.0001350690299382239, 0.0001367679891859423, 0.0001384883187102277, 0.0001402302873160683, 0.0001419941671896027, 0.0001437802339406498, 0.0001455887666457733, 0.0001474200478918879, 0.0001492743638204141, 0.0001511520041719882, 0.0001530532623317352, 0.000154978435375111, 0.0001569278241143202, 0.0001589017331453194, 0.0001609004708954106, 0.0001629243496714337, 0.0001649736857085652, 0.0001670487992197303, 0.0001691500144456368, 0.0001712776597054389, 0.0001734320674480369, 0.0001756135743040241, 0.0001778225211382849, 0.0001800592531032563, 0.0001823241196928584, 0.0001846174747971038, 0.0001869396767573935, 0.0001892910884225082, 0.0001916720772053047, 0.0001940830151401244, 0.0001965242789409245, 0.0001989962500601411, 0.0002014993147482906, 0.000204033864114323, 0.0002066002941867334, 0.0002091990059754419, 0.000211830405534453, 0.000214494904025301, 0.0002171929177812963, 0.0002199248683725774, 0.0002226911826719821, 0.0002254922929217482, 0.000228328636801051, 0.0002312006574943921, 0.0002341088037608482, 0.0002370535300041898, 0.0002400352963438838, 0.0002430545686869872, 0.0002461118188009463, 0.0002492075243873115, 0.0002523421691563785, 0.0002555162429027707, 0.0002587302415819682, 0.0002619846673878036, 0.0002652800288309304, 0.0002686168408182782, 0.0002719956247335092, 0.0002754169085184843, 0.0002788812267557554, 0.0002823891207520954, 0.0002859411386230778, 0.000289537835378722, 0.0002931797730102135, 0.0002968675205777167, 0.0003006016542992918, 0.0003043827576409298, 0.000308211421407721, 0.0003120882438361681, 0.000316013830687663, 0.0003199887953431381, 0.0003240137588989075, 0.0003280893502637163, 0.0003322162062570067, 0.0003363949717084239, 0.0003406262995585714, 0.0003449108509610343, 0.0003492492953856867, 0.0003536423107232963, 0.0003580905833914474, 0.0003625948084417942, 0.0003671556896686647, 0.0003717739397190302, 0.0003764502802038568, 0.0003811854418108594, 0.0003859801644186726, 0.0003908351972124587, 0.00039575129880097, 0.0004007292373350812, 0.0004057697906278159, 0.0004108737462758813, 0.0004160419017827311, 0.0004212750646831787, 0.0004265740526695737, 0.00043193969371957, 0.0004373728262254981, 0.0004428742991253656, 0.0004484449720355065, 0.0004540857153848963, 0.0004597974105511597, 0.0004655809499982866, 0.0004714372374160822, 0.0004773671878613711, 0.0004833717279009743, 0.0004894517957564896, 0.0004956083414508891, 0.0005018423269569629, 0.0005081547263476303, 0.0005145465259481384, 0.000521018724490179, 0.0005275723332679419, 0.0005342083762961317, 0.0005409278904699737, 0.000547731925727228, 0.0005546215452122464, 0.0005615978254420905, 0.000568661856474739, 0.0005758147420794139, 0.0005830575999090427, 0.0005903915616748972, 0.0005978177733234244, 0.0006053373952153028, 0.0006129516023067524, 0.0006206615843331221, 0.0006284685459947898, 0.0006363737071453983, 0.0006443783029824601, 0.0006524835842403613, 0.0006606908173857882, 0.0006690012848156176, 0.0006774162850572931, 0.0006859371329717224, 0.0006945651599587291, 0.000703301714165083, 0.0007121481606951537, 0.0007211058818242097, 0.0007301762772144014, 0.0007393607641334637, 0.000748660777676163, 0.0007580777709885365, 0.0007676132154949481, 0.0007772686011280012, 0.0007870454365613473, 0.000796945249445411, 0.000806969586646099, 0.0008171200144864929, 0.0008273981189915931, 0.0008378055061361417, 0.0008483438020955513, 0.0008590146535000079, 0.0008698197276917526, 0.0008807607129856105, 0.0008918393189327957, 0.0009030572765880234, 0.0009144163387800033, 0.0009259182803853122, 0.0009375648986057256, 0.0009493580132490386, 0.0009612994670134037, 0.0009733911257752697, 0.0009856348788809177, 0.0009980326394416796, 0.001010586344632869, 0.00102329795599646, 0.001036169459747592, 0.001049202867084912, 0.001062400214504834, 0.001075763564119744, 0.001089295003980203, 0.001102996648401224, 0.001116870638292621, 0.001130919141493541, 0.001145144353111194, 0.001159548495863834, 0.001174133820428073, 0.001188902605790547, 0.001203857159604017, 0.001218999818547943, 0.001234332948693589, 0.001249858945873741, 0.001265580236057042, 0.001281499275727069, 0.001297618552266161, 0.001313940584344069, 0.001330467922311514, 0.001347203148598678, 0.001364148878118715, 0.001381307758676344, 0.001398682471381555, 0.001416275731068563, 0.001434090286719986, 0.001452128921896387, 0.001470394455171216, 0.001488889740571203, 0.001507617668022326, 0.001526581163801349, 0.001545783190993069, 0.001565226749953304, 0.001584914878777695, 0.001604850653776431, 0.001625037189954914, 0.001645477641500489, 0.001666175202275302, 0.001687133106315327, 0.001708354628335715, 0.001729843084242454, 0.001751601831650498, 0.001773634270408403, 0.001795943843129542, 0.001818534035730048, 0.001841408377973473, 0.001864570444022329, 0.001888023852996565, 0.001911772269539043, 0.001935819404388168, 0.001960169014957685, 0.001984824905923782, 0.00200979092981959, 0.002035070987637131, 0.002060669029436876, 0.002086589054964934, 0.002112835114278028, 0.002139411308376326, 0.002166321789844222, 0.002193570763499198, 0.002221162487048825, 0.002249101271756043, 0.002277391483112806, 0.002306037541522194, 0.002335043922989115, 0.002364415159819686, 0.002394155841329419, 0.002424270614560308, 0.002454764185006938, 0.002485641317351734, 0.002516906836209441, 0.002548565626880992, 0.002580622636116842, 0.002613082872889901, 0.002645951409178209, 0.00267923338075743, 0.002712933988003337, 0.002747058496704377, 0.002781612238884461, 0.002816600613636105, 0.002852029087964048, 0.002887903197639483, 0.002924228548065035, 0.002961010815150609, 0.002998255746200276, 0.003035969160810286, 0.003074156951778398, 0.003112825086024647, 0.003151979605523678, 0.003191626628248826, 0.003231772349128053, 0.003272423041011921, 0.003313585055653745, 0.003355264824702056, 0.003397468860705566, 0.003440203758130766, 0.003483476194392321, 0.003527292930896439, 0.003571660814097341, 0.003616586776567048, 0.003662077838078596, 0.003708141106702898, 0.003754783779919395, 0.00380201314574067, 0.003849836583851225, 0.00389826156676056, 0.003947295660970774, 0.003996946528158852, 0.004047221926373804, 0.00409812971124889, 0.004149677837229069, 0.004201874358813898, 0.004254727431816073, 0.004308245314635779, 0.004362436369551091, 0.004417309064024591, 0.004472871972026428, 0.004529133775374025, 0.004586103265088616, 0.00464378934276888, 0.004702201021981813, 0.004761347429671132, 0.004821237807583375, 0.004881881513711935, 0.00494328802375928, 0.005005466932617535, 0.005068427955867712, 0.005132180931297792, 0.005196735820439887, 0.00526210271012677, 0.005328291814067941, 0.005395313474445553, 0.005463178163530398, 0.005531896485318216, 0.005601479177186597, 0.005671937111572713, 0.005743281297672157, 0.005815522883159153, 0.005888673155928423, 0.005962743545858904, 0.006037745626599722, 0.006113691117378584, 0.006190591884832933, 0.00626845994486415, 0.006347307464515032, 0.006427146763870943, 0.00650799031798484, 0.006589850758826525, 0.006672740877256438, 0.006756673625024219, 0.006841662116792471, 0.00692771963218594, 0.007014859617866486, 0.007103095689634175, 0.007192441634554728, 0.007282911413113818, 0.007374519161398405, 0.007467279193305528, 0.007561206002778911, 0.007656314266073621, 0.007752618844049296, 0.007850134784492181, 0.007948877324466357, 0.008048861892694607, 0.008150104111969127, 0.008252619801592656, 0.00835642497985027, 0.008461535866512259, 0.008567968885368524, 0.008675740666794796, 0.008784868050351188, 0.008895368087413398, 0.009007258043837027, 0.009120555402655422, 0.009235277866811395, 0.009351443361923378, 0.00946907003908632, 0.009588176277707833, 0.009708780688380042, 0.00983090211578748, 0.009954559641651635, 0.01007977258771251, 0.01020656051874765, 0.01033494324562928, 0.01046494082841963, 0.01059657357950552, 0.0107298620667721, 0.01086482711681667, 0.01100148981820292, 0.01113987152475597, 0.01127999385889901, 0.01142187871503183, 0.01156554826295185, 0.01171102495131826, 0.01185833151115958, 0.01200749095942548, 0.01215852660258322, 0.01231146204025933, 0.01246632116892708, 0.01262312818564042, 0.01278190759181471, 0.01294268419705522, 0.01310548312303362, 0.01327032980741329, 0.01343725000782408, 0.01360626980588688, 0.01377741561128903, 0.01395071416591081, 0.01412619254800395, 0.01430387817642269, 0.01448379881490797, 0.01466598257642559, 0.01485045792755895, 0.01503725369295697, 0.01522639905983805, 0.01541792358255059, 0.01561185718719091, 0.0158082301762793, 0.01600707323349481, 0.01620841742846971, 0.0164122942216441, 0.01661873546918168, 0.01682777342794741, 0.01703944076054761, 0.01725377054043367, 0.01747079625706974, 0.01769055182116559, 0.01791307156997521, 0.01813839027266205, 0.01836654313573184, 0.01859756580853362, 0.01883149438883002, 0.01906836542843765, 0.01930821593893835, 0.01955108339746239, 0.01979700575254428, 0.02004602143005232, 0.02029816933919275, 0.02055348887858936, 0.02081201994243967, 0.02107380292674837, 0.02133887873563936, 0.02160728878774707, 0.02187907502268823, 0.02215427990761502, 0.02243294644385061, 0.02271511817360823, 0.02300083918679476, 0.02329015412789976, 0.02358310820297145, 0.02387974718668007, 0.02418011742947034, 0.02448426586480378, 0.0247922400164922, 0.02510408800612343, 0.02541985856058028, 0.02573960101965432, 0.02606336534375531, 0.02639120212171757, 0.02672316257870469, 0.02705929858421342, 0.02739966266017851, 0.0277443079891793, 0.02809328842274961, 0.02844665848979223, 0.02880447340509907, 0.0291667890779786, 0.02953366212099186, 0.02990514985879821, 0.03028131033711252, 0.0306622023317748, 0.03104788535793416, 0.03143841967934811, 0.0318338663177989, 0.03223428706262838, 0.03263974448039255, 0.03305030192463786, 0.03346602354580026, 0.03388697430122881, 0.03431321996533555, 0.03474482713987265, 0.03518186326433927, 0.03562439662651899, 0.03607249637314997, 0.03652623252072935, 0.03698567596645329, 0.03745089849929494, 0.03792197281122155, 0.03839897250855272, 0.03888197212346171, 0.03937104712562098, 0.03986627393399464, 0.04036772992877905, 0.04087549346349359, 0.04138964387722367, 0.04191026150701745, 0.04243742770043879, 0.04297122482827795, 0.04351173629742211, 0.04405904656388811, 0.04461324114601853, 0.04517440663784437, 0.04574263072261542, 0.04631800218650093, 0.04690061093246284, 0.04749054799430298, 0.0480879055508875, 0.04869277694054995, 0.04930525667567554, 0.04992544045746908, 0.05055342519090819, 0.05118930899988512, 0.05183319124253874, 0.05248517252677941, 0.05314535472600935, 0.05381384099504029, 0.05449073578621182, 0.05517614486571221, 0.05587017533010455, 0.05657293562306103, 0.05728453555230714, 0.0580050863067796, 0.05873470047399974, 0.05947349205766558, 0.06022157649546525, 0.06097907067711407, 0.06174609296261908, 0.06252276320077291, 0.06330920274788034, 0.06410553448672075, 0.06491188284574845, 0.06572837381853505, 0.0665551349834562, 0.06739229552362594, 0.068239986247082, 0.06909833960722449, 0.06996748972351226, 0.0708475724024193, 0.07173872515865468, 0.0726410872366497, 0.07355479963231491, 0.07448000511507108, 0.07541684825015721, 0.07636547542121913, 0.07732603485318246, 0.07829867663541262, 0.07928355274516699, 0.08028081707134141, 0.08129062543851576, 0.08231313563130199, 0.083348507418998, 0.0843969025805521, 0.08545848492984123, 0.08653342034126721, 0.08762187677567519, 0.08872402430659748, 0.08984003514682809, 0.09097008367533138, 0.09211434646448893, 0.09327300230768971, 0.0944462322472665, 0.09563421960278425, 0.09683714999968428, 0.09805521139828838, 0.09928859412316843, 0.1005374908928844, 0.1018020968500976, 0.1030826095920615, 0.1043792292014973, 0.1056921582778571, 0.1070216019689802, 0.1083677680031481, 0.1097308667215425, 0.1111111111111111, 0.1125087168378483, 0.1139239022804922, 0.1153568885646473, 0.1168078995973361, 0.1182771621019844, 0.119764905653848, 0.1212713627158836, 0.122796768675072, 0.1243413618791979, 0.1259053836740918, 0.1274890784413414, 0.1290926936364759, 0.1307164798276323, 0.1323606907347065, 0.1340255832689979, 0.1357114175733522, 0.1374184570628095, 0.1391469684657628, 0.1408972218656356, 0.1426694907430824, 0.1444640520187213, 0.1462811860964024, 0.1481211769070226, 0.1499843119528893, 0.151870882352644, 0.1537811828867502, 0.1557155120435526, 0.1576741720659176, 0.1596574689984584, 0.1616657127353557, 0.1636992170687795, 0.165758299737919, 0.1678432824786304, 0.1699544910737089, 0.1720922554037926, 0.1742569094989077, 0.1764487915906604, 0.1786682441650871, 0.1809156140161679, 0.1831912523000142, 0.1854955145897381, 0.1878287609310102, 0.1901913558983184, 0.1925836686519329, 0.1950060729955887, 0.1974589474348932, 0.1999426752364677, 0.2024576444878344, 0.2050042481580557, 0.2075828841591358, 0.2101939554081967, 0.212837869890433, 0.2155150407228617, 0.2182258862188721, 0.220970829953588, 0.223750300830053, 0.2265647331462462, 0.2294145666629431, 0.2323002466724285, 0.235222224068075, 0.2381809554147955, 0.2411769030203826, 0.2442105350077448, 0.2472823253880521, 0.2503927541348009, 0.2535423072588116, 0.2567314768841672, 0.2599607613251103, 0.2632306651639046, 0.2665416993296776, 0.2698943811782543, 0.2732892345729945, 0.276726789966648, 0.2802075844842389, 0.2837321620069919, 0.2873010732573163, 0.2909148758848558, 0.2945741345536231, 0.2982794210302303, 0.3020313142732272, 0.3058304005235675, 0.3096772733962057, 0.3135725339728555, 0.3175167908959073, 0.321510660463529, 0.3255547667259672, 0.329649741583052, 0.3337962248829369, 0.3379948645220739, 0.3422463165464479, 0.3465512452540884, 0.3509103232988632, 0.355324231795586, 0.3597936604264403, 0.3643193075487415, 0.3689018803040612, 0.3735420947287145, 0.3782406758656464, 0.3829983578777197, 0.3878158841624268, 0.3926940074680526, 0.3976334900112867, 0.4026351035963268, 0.4076996297354719, 0.4128278597712338, 0.4180205949999905, 0.4232786467971847, 0.4286028367441082, 0.4339939967562728, 0.4394529692133975, 0.4449806070910365, 0.4505777740938529, 0.456245344790579, 0.4619842047506679, 0.4677952506826622, 0.4736793905743136, 0.4796375438344494, 0.4856706414366396, 0.4917796260646597, 0.4979654522597849, 0.5042290865699453, 0.5105715077007433, 0.5169937066683854, 0.5234966869545272, 0.5300814646630669, 0.5367490686789204, 0.5435005408287785, 0.5503369360439014, 0.5572593225249514, 0.564268781908898, 0.5713664094380324, 0.5785533141310945, 0.5858306189565655, 0.593199461008131, 0.6006609916823519, 0.6082163768585783, 0.6158667970811138, 0.6236134477436839, 0.6314575392762164, 0.6394002973339701, 0.6474429629890532, 0.6555867929243353, 0.6638330596298135, 0.6721830516014389, 0.6806380735424429, 0.689199446567208, 0.6978685084076851, 0.7066466136224254, 0.7155351338082293, 0.7245354578144569, 0.7336489919600483, 0.7428771602532525, 0.7522214046141414, 0.7616831850999092, 0.7712639801330039, 0.7809652867321445, 0.7907886207462219, 0.8007355170911628, 0.8108075299897597, 0.8210062332145167, 0.831333220333565, 0.8417901049596499, 0.8523785210022697, 0.8631001229229753, 0.8739565859938768, 0.88494960655942, 0.8960809023014306, 0.9073522125075163, 0.9187652983428305, 0.9303219431252516, 0.9420239526040424, 0.9538731552419885, 0.9658714025011113, 0.978020569131957, 0.9903225534665263, 1.002779277714903, 1.015392688265592, 1.028164755989654, 1.04109747654866, 1.054192870706503, 1.067452984645168, 1.080879890284432, 1.09447568560562, 1.108242494979414, 1.122182469497784, 1.136297787310112, 1.150590653963522, 1.165063302747508, 1.17971799504289, 1.194557020675151, 1.209582698272239, 1.224797375626845, 1.240203430063263, 1.255803268808843, 1.271599329370125, 1.287594079913711, 1.303790019651913, 1.320189679233266, 1.336795621137949, 1.353610440078164, 1.370636763403585, 1.387877251511868, 1.405334598264358, 1.423011531407005, 1.44091081299657, 1.459035239832225, 1.477387643892532, 1.495970892777968, 1.514787890158974, 1.533841576229677, 1.553134928167294, 1.572670960597312, 1.59245272606455, 1.612483315510103, 1.632765858754327, 1.653303524985872, 1.674099523256866, 1.695157102984353, 1.716479554458003, 1.738070209354241, 1.759932441256826, 1.782069666183964, 1.804485343122095, 1.82718297456634, 1.850166107067794, 1.873438331787672, 1.897003285058429, 1.920864648951963, 1.945026151854921, 1.969491569051294, 1.994264723312295, 2.019349485493674, 2.044749775140564, 2.070469561099891, 2.096512862140543, 2.122883747581296, 2.149586337926646, 2.176624805510669, 2.204003375148926, 2.23172632479863, 2.25979798622707, 2.288222745688449, 2.31700504460927, 2.346149380282292, 2.375660306569261, 2.405542434612453, 2.435800433555164, 2.466439031271296, 2.497463015104069, 2.528877232614081, 2.560686592336733, 2.592896064549191, 2.625510682047028, 2.658535540930572, 2.69197580140122, 2.725836688567708, 2.760123493262538, 2.794841572868711, 2.829996352156789, 2.865593324132563, 2.901638050895333, 2.938136164506978, 2.975093367872018, 3.012515435628656, 3.050408215051122, 3.088777626963299, 3.127629666663851, 3.166970404863035, 3.206805988631226, 3.247142642359438, 3.287986668731887, 3.32934444971078, 3.371222447533551, 3.413627205722548, 3.456565350107518, 3.500043589860885, 3.544068718546056, 3.588647615178969, 3.633787245302908, 3.679494662076924, 3.725777007377885, 3.772641512916395, 3.820095501366803, 3.868146387511339, 3.916801679398727, 3.966068979517323, 4.015955985982993, 4.066470493742004, 4.117620395788955, 4.16941368440011, 4.2218584523822, 4.274962894336904, 4.328735307941336, 4.383184095244506, 4.438317763980215, 4.494144928896382, 4.550674313101089, 4.607914749425646, 4.665875181804668, 4.724564666673642, 4.783992374383995, 4.844167590635945, 4.905099717929477, 4.966798277033429, 5.029272908473196, 5.092533374037057, 5.156589558301442, 5.221451470175484, 5.287129244464856, 5.353633143455424, 5.420973558516717, 5.489161011725576, 5.558206157510313, 5.628119784315407, 5.698912816287301, 5.770596314981277, 5.843181481089828, 5.916679656192855, 5.99110232452973, 6.066461114793804, 6.142767801949387, 6.220034309071573, 6.298272709209319, 6.377495227271805, 6.457714241938665, 6.538942287594162, 6.621192056285666, 6.704476399706895, 6.788808331205931, 6.874201027818653, 6.960667832327657, 7.048222255347039, 7.13687797743355, 7.226648851224112, 7.317548903600411, 7.409592337880579, 7.502793536038461, 7.597167060950913, 7.692727658673182, 7.789490260743095, 7.887469986514104, 7.986682145517674, 8.087142239855527, 8.18886596662176, 8.291869220355657, 8.396168095525187, 8.501778889041766, 8.608718102806774, 8.717002446289905, 8.826648839140148, 8.937674413829479, 9.050096518329793, 9.16393271882368, 9.279200802449031, 9.395918780078444, 9.514104889133407, 9.633777596433879, 9.75495560108388, 9.877657837393148, 10.00190347783576, 10.12771193604583, 10.25510286985088, 10.38409618434353, 10.51471203499157, 10.64697083078743, 10.78089323743705, 10.9165001805889, 11.05381284910377, 11.19285269836546, 11.3336414536333, 11.47620111343676, 11.62055395301269, 11.766722527786, 11.91472967689384, 12.0645985267544, 12.2163524946804, 12.37001529253804, 12.52561093045215, 12.68316372055767, 12.84269828079863, 13.00423953877466, 13.16781273563593, 13.33344343002728, 13.50115750208166, 13.67098115746406, 13.84294093146616, 14.01706369315246, 14.19337664955878, 14.37190734994322, 14.55268369009096, 14.73573391667296, 14.9210866316595, 15.10877079678946, 15.29881573809541, 15.49125115048613, 15.68610710238633, 15.88341404043489, 16.08320279424235, 16.28550458120791, 16.4903510113974, 16.69777409248233, 16.90780623474107, 17.12048025612321, 17.33582938737723, 17.55388727724305, 17.77468799770963, 17.99826604933871, 18.22465636665578, 18.45389432360846, 18.68601573909394, 18.92105688255568, 19.15905447965048, 19.40004571798716, 19.64406825293694, 19.89116021351741, 20.14136020835015, 20.39470733169333, 20.65124116955048, 20.91100180585561, 21.17402982873666, 21.4403663368574, 21.710052945839, 21.98313179476286, 22.25964555275465, 22.53963742565166, 22.82315116275371, 23.11023106365896, 23.40092198518608, 23.69526934838289, 23.99331914562379, 24.29511794779602, 24.60071291157635, 24.9101517867997, 25.22348292391979, 25.54075528156432, 25.8620184341847, 26.18732257980207, 26.5167185478511, 26.85025780712186, 27.18799247380226, 27.52997531962115, 27.87625978009384, 28.22689996287192, 28.58195065619722, 28.94146733746298, 29.30550618188214, 29.67412407126468, 30.04737860290589, 30.42532809858562, 30.80803161368155, 31.19554894639661, 31.58794064710233, 31.98526802780037, 32.38759317170224, 32.79497894293026, 33.20748899634006, 33.62518778746665, 34.04814058259606, 34.47641346896293, 34.91007336507713, 35.34918803117979, 35.79382607983072, 36.24405698662981, 36.6999511010721, 37.16157965754056, 37.62901478643639, 38.10232952544938, 38.58159783097057, 39.06689458964765, 39.55829563008657, 40.05587773469975, 40.55971865170321, 41.06989710726547, 41.58649281780802, 42.10958650246172, 42.63925989567914, 43.17559576000542, 43.71867789901071, 44.26859117038399, 44.82542149919279, 45.38925589130895, 45.9601824470032, 46.53829037471159, 47.12367000497384, 47.71641280454821, 48.31661139070318, 48.92435954568877, 49.53975223139084, 50.16288560416839, 50.79385702987877, 51.43276509909106, 52.07970964249078, 52.73479174647921, 53.3981137689677, 54.06977935537191, 54.74989345480631, 55.43856233648245, 56.13589360631439, 56.84199622373158, 57.55698051870466, 58.28095820898449, 59.0140424175579, 59.75634769032416, 60.50799001399216, 61.26908683420431, 62.03975707388733, 62.82012115183391, 63.6103010015191, 64.41042009015183, 65.22060343796765, 66.04097763776313, 66.8716708746758, 67.71281294621423, 68.56453528253819, 69.42697096699565, 70.30025475691713, 71.1845231046712, 72.07991417898656, 72.98656788653997, 73.90462589381798, 74.83423164925216, 75.77553040563284, 76.72866924280603, 77.69379709065397, 78.67106475236659, 79.66062492800444, 80.66263223835793, 81.67724324910822, 82.70461649528967, 83.74491250606236, 84.79829382979463, 85.8649250594611, 86.94497285836184, 88.03860598616245, 89.14599532526432, 90.26731390750489, 91.40273694119361, 92.55244183848987, 93.71660824312261, 94.89541805846129, 96.08905547593798, 97.29770700382733, 98.52156149638991, 99.76081018337982 + ], + "paw_data": { + "aug_integrals": [ + -0.0981138722783857, -0.0958004681236609, 0.0, 0.0, -0.0958004681236609, -0.08747544885960724, 0.0, 0.0, 0.0, 0.0, 0.2390827668467262, 0.1572942928639029, 0.0, 0.0, 0.1572942928639029, 0.1034693219640171 + ], + "occupations": [ + 2.0, 0.0, 5.0, 0.0 + ], + "ae_core_charge_density": [ + 432.4173170426889, 432.3939778343332, 432.3706398856836, 432.3473031966718, 432.3238386430548, 432.2999781420502, 432.2757952235382, 432.2512790491502, 432.2264325054321, 432.2012583723152, 432.1757593036001, 432.1499378308868, 432.1237963658064, 432.0973372021177, 432.0705625177509, 432.0434743767966, 432.016074731431, 431.988365423787, 431.9603481877731, 431.9320246508303, 431.9033963356471, 431.8744646618139, 431.8452309474323, 431.8156964106723, 431.7858621712798, 431.7557292520436, 431.7252985802077, 431.6945709888446, 431.6635472181813, 431.6322279168805, 431.6006136432866, 431.5687048666204, 431.5365019681411, 431.5040052422644, 431.4712148976402, 431.4381310581989, 431.404753764154, 431.3710829729695, 431.3371185602941, 431.3028603208539, 431.2683079693211, 431.2334611411384, 431.198319393317, 431.1628822051994, 431.1271489791886, 431.0911190414541, 431.0547916425968, 431.0181659582914, 430.9812410898962, 430.9440160650328, 430.9064898381438, 430.8686612910144, 430.830529233272, 430.7920924028587, 430.7533494664721, 430.7142990199899, 430.6749395888611, 430.6352696284739, 430.5952875245022, 430.5549915932219, 430.5143800818126, 430.4734511686265, 430.4322029634397, 430.3906335076815, 430.348740774635, 430.3065226696264, 430.2639770301823, 430.2211016261729, 430.1778941599314, 430.1343522663506, 430.0904735129657, 430.0462554000109, 430.0016953604586, 429.9567907600406, 429.9115388972446, 429.865937003301, 429.8199822421425, 429.7736717103497, 429.727002437077, 429.6799713839591, 429.6325754450042, 429.584811446466, 429.5366761466974, 429.4881662359915, 429.439278336399, 429.3900090015367, 429.3403547163734, 429.2903118970008, 429.2398768903893, 429.1890459741247, 429.1378153561346, 429.0861811743927, 429.0341394966101, 428.9816863199125, 428.9288175704962, 428.8755291032787, 428.8218167015233, 428.7676760764561, 428.7131028668642, 428.6580926386785, 428.6026408845472, 428.5467430233869, 428.4903943999252, 428.4335902842253, 428.3763258711944, 428.3185962800859, 428.2603965539776, 428.2017216592414, 428.1425664849971, 428.0829258425493, 428.0227944648177, 427.9621670057458, 427.9010380396987, 427.8394020608478, 427.7772534825377, 427.7145866366458, 427.6513957729225, 427.5876750583186, 427.5234185763017, 427.458620326154, 427.3932742222631, 427.3273740933923, 427.2609136819417, 427.1938866431941, 427.1262865445453, 427.0581068647265, 426.9893409930068, 426.9199822283857, 426.8500237787712, 426.7794587601414, 426.7082801957, 426.6364810150096, 426.5640540531178, 426.4909920496661, 426.4172876479837, 426.342933394175, 426.267921736187, 426.1922450228629, 426.1158955029873, 426.0388653243097, 425.9611465325655, 425.8827310704717, 425.8036107767171, 425.7237773849354, 425.6432225226631, 425.5619377102885, 425.4799143599836, 425.3971437746213, 425.3136171466828, 425.2293255571445, 425.1442599743611, 425.0584112529248, 424.9717701325167, 424.8843272367432, 424.7960730719553, 424.7069980260598, 424.6170923673124, 424.5263462430972, 424.4347496786945, 424.342292576029, 424.2489647124154, 424.1547557392756, 424.0596551808538, 423.963652432911, 423.866736761404, 423.768897301163, 423.6701230545337, 423.5704028900272, 423.4697255409416, 423.3680796039721, 423.2654535378165, 423.1618356617485, 423.0572141541959, 422.9515770512938, 422.8449122454214, 422.7372074837403, 422.6284503666943, 422.5186283465204, 422.4077287257285, 422.2957386555714, 422.1826451345089, 422.0684350066419, 421.9530949601499, 421.8366115257028, 421.7189710748598, 421.6001598184662, 421.4801638050156, 421.3589689190184, 421.2365608793468, 421.1129252375629, 420.9880473762466, 420.8619125072911, 420.7345056702038, 420.60581173038, 420.4758153773682, 420.3445011231281, 420.2118533002606, 420.0778560602412, 419.9424933716311, 419.805749018273, 419.6676065974879, 419.5280495182387, 419.3870609993015, 419.2446240674113, 419.1007215553984, 418.9553361003209, 418.8084501415689, 418.6600459189742, 418.5101054708981, 418.3586106323065, 418.2055430328476, 418.0508840948972, 417.8946150316123, 417.736716844965, 417.577170323764, 417.415956041677, 417.2530543552267, 417.0884454017932, 416.9221090975979, 416.754025135678, 416.584172983862, 416.4125318827196, 416.2390808435223, 416.0637986461829, 415.8866638371906, 415.7076547275472, 415.5267493906806, 415.3439256603676, 415.1591611286433, 414.9724331437017, 414.7837188078033, 414.5929949751608, 414.4002382498369, 414.2054249836304, 414.0085312739548, 413.8095329617295, 413.6084056292478, 413.4051245980626, 413.1996649268595, 412.9920014093306, 412.7821085720583, 412.5699606723814, 412.3555316962822, 412.1387953562631, 411.9197250892297, 411.6982940543793, 411.4744751310889, 411.2482409168148, 411.0195637249913, 410.7884155829402, 410.5547682297871, 410.3185931143842, 410.0798613932445, 409.838543928485, 409.59461128578, 409.3480337323295, 409.0987812348355, 408.8468234574991, 408.5921297600264, 408.3346691956528, 408.0744105091871, 407.8113221350698, 407.5453721954547, 407.2765284983086, 407.0047585355343, 406.7300294811176, 406.4523081892978, 406.1715611927644, 405.8877547008832, 405.6008545979473, 405.3108264414633, 405.0176354604649, 404.721246553863, 404.421624288831, 404.1187328992217, 403.812536284032, 403.5029980058953, 403.1900812896283, 402.8737490208131, 402.5539637444259, 402.2306876635168, 401.9038826379342, 401.5735101831049, 401.2395314688647, 400.9019073183429, 400.5605982069115, 400.2155642611837, 399.8667652580854, 399.5141606239828, 399.1577094338793, 398.7973704106849, 398.4331019245505, 398.0648619922833, 397.6926082768348, 397.3162980868667, 396.935888376406, 396.551335744575, 396.1625964354183, 395.7696263378141, 395.372380985481, 394.9708155570828, 394.5648848764305, 394.1545434127885, 393.7397452812867, 393.3204442434399, 392.8965937077848, 392.4681467306256, 392.0350560169074, 391.5972739212049, 391.154752448844, 390.70744325715, 390.2552976568314, 389.7982666135025, 389.3363007493465, 388.8693503449249, 388.397365341139, 387.9202953413412, 387.4380896136108, 386.9506970931874, 386.4580663850734, 385.9601457668094, 385.4568831914229, 384.9482262905613, 384.4341223778079, 383.9145184521888, 383.3893612018738, 382.8585970080823, 382.3221719491875, 381.7800318050357, 381.2321220614805, 380.6783879151325, 380.1187742783458, 379.5532257844249, 378.9816867930757, 378.4041013960953, 377.8204134233117, 377.2305664487788, 376.6345037972293, 376.0321685507936, 375.4235035559921, 374.8084514310005, 374.1869545732083, 373.5589551670575, 372.9243951921847, 372.2832164318623, 371.6353604817483, 370.9807687589579, 370.3193825114498, 369.6511428277454, 368.9759906469849, 368.2938667693184, 367.604711866658, 366.9084664937752, 366.2050710997665, 365.4944660398878, 364.776591587764, 364.0513879479859, 363.3187952690952, 362.5787536569689, 361.8312031886083, 361.0760839263389, 360.3133359324354, 359.5428992841667, 358.7647140892793, 357.9787205019215, 357.1848587390121, 356.3830690970727, 355.5732919695157, 354.755467864406, 353.9295374227004, 353.0954414369677, 352.2531208706079, 351.4025168775622, 350.5435708225347, 349.6762243017243, 348.8004191640742, 347.9160975330549, 347.0232018289732, 346.1216747918249, 345.2114595046943, 344.2924994177022, 343.3647383725211, 342.4281206274488, 341.4825908830565, 340.5280943084137, 339.5645765678969, 338.591983848583, 337.6102628882446, 336.6193610039352, 335.6192261211839, 334.6098068037966, 333.5910522842684, 332.5629124948176, 331.5253380990349, 330.4782805241604, 329.4216919939869, 328.3555255623905, 327.2797351475005, 326.1942755664977, 325.0991025710521, 323.9941728833977, 322.8794442330458, 321.7548753941381, 320.6204262234381, 319.476057698961, 318.321731959241, 317.1574123432322, 315.9830634308488, 314.7986510841288, 313.6041424890295, 312.3995061978454, 311.1847121722419, 309.9597318269081, 308.72453807381, 307.4791053670481, 306.2234097483071, 304.9574288928856, 303.6811421563079, 302.3945306214957, 301.0975771464937, 299.7902664127394, 298.4725849738592, 297.1445213049845, 295.8060658525633, 294.4572110846601, 293.0979515417224, 291.728283887796, 290.3482069621759, 288.9577218314624, 287.5568318420101, 286.1455426727434, 284.7238623883125, 283.2918014925717, 281.8493729823464, 280.3965924014635, 278.9334778950198, 277.4600502638497, 275.9763330191723, 274.4823524373717, 272.978137614886, 271.4637205231609, 269.9391360636375, 268.4044221227299, 266.859619626753, 265.3047725967581, 263.7399282032343, 262.1651368206242, 260.5804520816162, 258.9859309311537, 257.3816336801184, 255.767624058632, 254.1439692689203, 252.5107400376915, 250.8680106679614, 249.2158590902721, 247.5543669132432, 245.8836194733886, 244.2037058841427, 242.5147190840183, 240.8167558838383, 239.1099170129671, 237.3943071644685, 235.6700350391249, 233.937213388232, 232.1959590551012, 230.4463930151905, 228.6886404147775, 226.9228306081067, 225.1490971929141, 223.3675780442539, 221.5784153465411, 219.781755623718, 217.9777497674657, 216.1665530633608, 214.3483252148941, 212.5232303652587, 210.69143711681, 208.8531185481129, 207.0084522284733, 205.1576202298634, 203.3008091361449, 201.4382100494921, 199.5700185939218, 197.6964349158276, 195.8176636814281, 193.9339140710276, 192.0453997699917, 190.1523389563487, 188.2549542849094, 186.35347286782, 184.4481262514475, 182.539150389504, 180.6267856123237, 178.7112765921916, 176.7928723046417, 174.8718259856334, 172.9483950845206, 171.0228412127316, 169.0954300880746, 167.1664314745924, 165.2361191178913, 163.3047706758667, 161.372667644764, 159.4400952804987, 157.5073425151818, 155.5747018687887, 153.6424693559173, 151.7109443875912, 149.7804296680558, 147.8512310865365, 145.9236576039203, 143.9980211343349, 142.0746364216047, 140.1538209105655, 138.2358946132318, 136.3211799698133, 134.410001704585, 132.5026866766268, 130.5995637254482, 128.7009635115296, 126.8072183518155, 124.918662050203, 123.0356297230829, 121.1584576199908, 119.2874829394434, 117.4230436400398, 115.5654782469175, 113.715125653666, 111.8723249198048, 110.0374150639491, 108.2107348527927, 106.3926225860492, 104.5834158775051, 102.7834514323444, 100.9930648209204, 99.21259024915756, 97.44236032577818, 95.68270582656183, 93.93395545585113, 92.1964356055337, 90.47047011173781, 88.75638000949075, 87.05448328560104, 85.36509463003202, 83.68852518604967, 82.02508229943425, 80.37506926705531, 78.73878508512277, 77.11652419743015, 75.50857624392007, 73.91522580990897, 72.33675217631473, 70.77342907124226, 69.22552442328475, 67.69330011690923, 66.17701175029879, 64.67690839602999, 63.19323236497133, 61.72621897378787, 60.27609631644665, 58.8430850401171, 57.42739812586293, 56.02924067452655, 54.64880969820268, 53.28629391770263, 51.94187356640708, 50.61572020090274, 49.30799651879894, 48.01885618411092, 46.74844366059733, 45.4968940534304, 44.26433295957098, 43.05087632721495, 41.85663032466517, 40.68169121897699, 39.52614526471191, 38.39006860312312, 37.27352717208441, 36.17657662705784, 35.09926227338267, 34.04161901015097, 33.00367128591779, 31.98543306647745, 30.98690781491555, 30.00808848412919, 29.04895752198476, 28.10948688926185, 27.18963809050893, 26.28936221791191, 25.40860000825367, 24.5472819130165, 23.70532818165361, 22.88264895803031, 22.07914439000658, 21.2947047521071, 20.52921058119541, 19.78253282504089, 19.05453300363869, 18.3450633831132, 17.65396716200685, 16.981078669727, 16.32622357689356, 15.68921911730229, 15.06987432118813, 14.46799025944546, 13.88336029843341, 13.31577036496638, 12.76499922106319, 12.23081874800075, 11.71299423919334, 11.21128470139277, 10.72544316368101, 10.2552169937041, 9.800348220573568, 9.360573863841916, 8.935626267938826, 8.525233441437106, 8.12911940050138, 7.747004515857192, 7.378605862605886, 7.023637572029561, 6.681811185549106, 6.35283600839151, 6.03641946402174, 5.73226744773368, 5.440084678867499, 5.159575050940505, 4.890441978981704, 4.632388743364884, 4.385118829442766, 4.148336262294039, 3.921745935906878, 3.70505393613671, 3.497967856791962, 3.300197108220276, 3.111453217788199, 2.931450121670167, 2.759904447387738, 2.596535786566776, 2.441066957409634, 2.293224256410106, 2.152737698871861, 2.019341247825691, 1.89277303097685, 1.772775545351873, 1.659095849353034, 1.551485741969014, 1.449701928931891, 1.353506175652508, 1.262665446809548, 1.176952032510784, 1.096143660988739, 1.020023597836729, 0.9483807318345475, 0.8810096474562447, 0.8177106841946133, 0.7582899828783438, 0.7025595191980667, 0.6503371246960575, 0.6014464955116386, 0.555717189209422, 0.5129846100507728, 0.4730899830999224, 0.4358803175847022, 0.401208359958137, 0.3689325371306671, 0.3389168903637178, 0.3110310003336147, 0.2851499038902753, 0.2611540030479366, 0.2389289667551567, 0.2183656259986655, 0.1993598628003318, 0.18181249366855, 0.1656291480649507, 0.1507201424444393, 0.1370003504213675, 0.1243890696072317, 0.1128098856557154, 0.102190534039393, 0.0924627600689905, 0.08356217765097027, 0.07542812726249118, 0.06800353360458213, 0.06123476337488273, 0.05507148358060911, 0.04946652079069661, 0.04437572170348437, 0.03975781538296561, 0.03557427749271858, 0.03178919683225922, 0.02836914445588777, 0.02528304562927302, 0.02250205485415371, 0.01999943416679275, 0.01775043489130141, 0.01573218300479894, 0.01392356824769696, 0.01230513708930514, 0.01085898963655852, 0.009568680552049758, 0.008419124026807678, 0.007396502833470884, 0.006488181466727873, 0.005682623360197713, 0.004969312152350846, 0.004338676958661677, 0.003782021592971534, 0.003291457668040558, 0.002859841493493733, 0.002480714678817222, 0.002148248339730982, 0.001857190798135248, 0.001602818658876824, 0.001380891140776342, 0.001187607534659279, 0.001019567657498575, 0.0008737351691545322, 0.0007474036165334803, 0.0006381650692224935, 0.0005438812107308944, 0.0004626567503164404, 0.000392815021929153, 0.0003328756390014156, 0.0002815340765825461, 0.0002376430555922307, 0.0002001956076843599, 0.0001683097033064925, 0.0001412143299479, 0.0001182369122312489, 9.879197036222999e-05, 8.237091845417973e-05, 6.853290934058852e-05, 5.689663763068833e-05, 4.713301790935728e-05, 3.895866009347679e-05, 3.213006899785298e-05, 2.643850010402839e-05, 2.170540833787091e-05, 1.777843132373987e-05, 1.452785307496484e-05, 1.184349838661742e-05, 9.632012304740086e-06, 7.81448294711638e-06, 6.324369638058564e-06, 5.105701789771296e-06, 4.11151721751365e-06, 3.302511591401005e-06, 2.645873631104524e-06, 2.11428303138203e-06, 1.685050882216182e-06, 1.339384283183297e-06, 1.061758874644829e-06, 8.393849586540455e-07, 6.617545331358513e-07, 5.202580637270399e-07, 4.078611833673383e-07, 3.188327358395003e-07, 2.485166764492469e-07, 1.931413209178936e-07, 1.496603020863413e-07, 1.156203626197759e-07, 8.905178958196727e-08, 6.837789209546528e-08, 5.23404444173684e-08, 3.993847123240079e-08, 3.037814686975462e-08, 2.3031922037771e-08, 1.740528657093105e-08, 1.310983011627949e-08, 9.841479675697069e-09, 7.362978015756317e-09, 5.48982425584666e-09, 4.079031119715985e-09, 3.020165659284195e-09, 2.228234636852052e-09, 1.638054746604005e-09, 1.199813721079887e-09, 8.755830724529808e-10, 6.365884756451236e-10, 4.610810945943153e-10, 3.326837635412079e-10, 2.391109594174158e-10, 1.711818763492609e-10, 1.220624353199593e-10, 8.668540483694429e-11, 6.130853923993658e-11, 4.317923539664254e-11, 3.028106244543507e-11, 2.114296210772206e-11, 1.46962208742323e-11, 1.016770372606342e-11, 7.000495038094913e-12, 4.795165039051853e-12, 3.266490546572947e-12, 2.211704174907164e-12, 1.487305126598777e-12, 9.921995376952557e-13, 6.554991711461721e-13, 4.277314275812491e-13, 2.745442681446119e-13, 1.721892442731083e-13, 1.046364512878425e-13, 5.962788769869437e-14, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "ae_wfc": [ + { + "index": 0, + "radial_function": [ + 0.001252792496979783, 0.001268516451977268, 0.00128443776029652, 0.001300558898943711, 0.001316882179112816, 0.001333409718978639, 0.001350144181668446, 0.00136708813421473, 0.001384244196978891, 0.001401615022955945, 0.001419203298143376, 0.001437011741951382, 0.001455043107615519, 0.001473300182614219, 0.001491785789091379, 0.001510502784284059, 0.001529454060955359, 0.00154864254783252, 0.001568071210050331, 0.001587743049599877, 0.001607661105782716, 0.001627828455670532, 0.001648248214570327, 0.001668923536495229, 0.001689857614640961, 0.001711053681868058, 0.001732515011189881, 0.001754244916266497, 0.001776246751904508, 0.001798523914562875, 0.001821079842864819, 0.001843918018115864, 0.001867041964828099, 0.001890455251250715, 0.001914161489906906, 0.001938164338137192, 0.001962467498649248, 0.001987074720074303, 0.002011989797530189, 0.002037216573191117, 0.002062758936864249, 0.002088620826573149, 0.002114806229148186, 0.00214131918082397, 0.002168163767843896, 0.002195344127071886, 0.002222864446611391, 0.002250728966431749, 0.00227894197900198, 0.002307507829932092, 0.002336430918621991, 0.002365715698918064, 0.002395366679777551, 0.002425388425940747, 0.002455785558611159, 0.002486562756143692, 0.002517724754740941, 0.002549276349157696, 0.002581222393413741, 0.00261356780151503, 0.002646317548183346, 0.00267947666959453, 0.002713050264125358, 0.00274704349310919, 0.002781461581600452, 0.002816309819148071, 0.002851593560577942, 0.002887318226784541, 0.002923489305531764, 0.002960112352263098, 0.002997192990921233, 0.003034736914777189, 0.003072749887269093, 0.003111237742850673, 0.0031502063878496, 0.003189661801335761, 0.00322961003599958, 0.003270057219040482, 0.003311009553065608, 0.003352473316998901, 0.00339445486700064, 0.003436960637397559, 0.003479997141623648, 0.003523570973171737, 0.003567688806555993, 0.003612357398285417, 0.003657583587848471, 0.003703374298708941, 0.003749736539313145, 0.003796677404108607, 0.003844204074574301, 0.003892323820262594, 0.00394104399985299, 0.003990372062217807, 0.004040315547499878, 0.004090882088202425, 0.00414207941029121, 0.004193915334309067, 0.004246397776502964, 0.004299534749963699, 0.004353334365778345, 0.004407804834195573, 0.004462954465803986, 0.004518791672723567, 0.004575324969810366, 0.004632562975874566, 0.004690514414912043, 0.004749188117349533, 0.00480859302130355, 0.004868738173853165, 0.004929632732326783, 0.004991285965603041, 0.005053707255425941, 0.005116906097734372, 0.005180892104006117, 0.005245675002616496, 0.005311264640211756, 0.005377670983097356, 0.005444904118641249, 0.005512974256692308, 0.005581891731014025, 0.005651667000733594, 0.005722310651806523, 0.005793833398496906, 0.005866246084873463, 0.005939559686321485, 0.006013785311070842, 0.006088934201740125, 0.006165017736897096, 0.006242047432635564, 0.006320034944168788, 0.006398992067439576, 0.006478930740747174, 0.006559863046391078, 0.006641801212331914, 0.006724757613869472, 0.006808744775338062, 0.006893775371819289, 0.006979862230872369, 0.007067018334282122, 0.007155256819824753, 0.007244590983051548, 0.007335034279090597, 0.007426600324466662, 0.007519302898939323, 0.007613155947359483, 0.007708173581544394, 0.007804370082171271, 0.00790175990068963, 0.008000357661252459, 0.008100178162666323, 0.00820123638036051, 0.008303547468375328, 0.008407126761369649, 0.008511989776647794, 0.00861815221620589, 0.008725629968797733, 0.008834439112020328, 0.008944595914419115, 0.009056116837613051, 0.009169018538439556, 0.009283317871119457, 0.009399031889441998, 0.009516177848969977, 0.009634773209265098, 0.009754835636133594, 0.009876383003892212, 0.009999433397654581, 0.01012400511563806, 0.01025011667149109, 0.01037778679664114, 0.01050703444266321, 0.01063787878366905, 0.01077033921871703, 0.01090443537424272, 0.01104018710651023, 0.0111776145040843, 0.01131673789032321, 0.01145757782589239, 0.01160015511129889, 0.01174449078944662, 0.01189060614821233, 0.01203852272304239, 0.01218826229957026, 0.01233984691625473, 0.01249329886703869, 0.01264864070402872, 0.01280589524019502, 0.01296508555209201, 0.01312623498259927, 0.01328936714368287, 0.01345450591917693, 0.01362167546758537, 0.01379090022490371, 0.01396220490746078, 0.01413561451478029, 0.014311154332462, 0.01448884993508249, 0.01466872718911524, 0.01485081225586986, 0.01503513159445044, 0.01522171196473253, 0.01541058043035892, 0.01560176436175355, 0.01579529143915379, 0.0159911896556604, 0.01618948732030524, 0.01639021306113619, 0.01659339582831925, 0.01679906489725722, 0.01700724987172482, 0.01721798068701991, 0.01743128761313032, 0.01764720125791601, 0.01786575257030611, 0.01808697284351048, 0.01831089371824525, 0.01853754718597202, 0.01876696559215012, 0.01899918163950142, 0.01923422839128737, 0.01947213927459741, 0.01971294808364843, 0.01995668898309463, 0.02020339651134706, 0.02045310558390234, 0.02070585149667979, 0.02096166992936635, 0.02122059694876847, 0.02148266901217036, 0.02174792297069768, 0.02201639607268594, 0.02228812596705278, 0.02256315070667319, 0.02284150875175685, 0.0231232389732266, 0.02340838065609709, 0.02369697350285255, 0.02398905763682282, 0.02428467360555627, 0.0245838623841888, 0.02488666537880753, 0.02519312442980812, 0.02550328181524449, 0.02581718025416951, 0.02613486290996559, 0.02645637339366355, 0.02678175576724861, 0.02711105454695176, 0.02744431470652535, 0.02778158168050101, 0.02812290136742859, 0.02846832013309419, 0.0288178848137159, 0.02917164271911516, 0.0295296416358622, 0.02989192983039345, 0.03025855605209926, 0.03062956953637973, 0.03100502000766662, 0.0313849576824094, 0.03176943327202308, 0.03215849798579564, 0.03255220353375287, 0.03295060212947799, 0.0333537464928839, 0.03376168985293533, 0.03417448595031841, 0.03459218904005493, 0.03501485389405861, 0.03544253580363051, 0.03587529058189073, 0.03631317456614332, 0.03675624462017146, 0.03720455813645967, 0.03765817303833979, 0.03811714778205754, 0.03858154135875597, 0.03905141329637259, 0.03952682366144628, 0.04000783306083051, 0.04049450264330887, 0.040986894101109, 0.04148506967131108, 0.04198909213714649, 0.04249902482918259, 0.04301493162638912, 0.0435368769570819, 0.04406492579973904, 0.04459914368368521, 0.04513959668963879, 0.04568635145011735, 0.04623947514969597, 0.04679903552511348, 0.04736510086522112, 0.04793774001076818, 0.04851702235401895, 0.04910301783819528, 0.04969579695673872, 0.05029543075238638, 0.05090199081605402, 0.05151554928552027, 0.05213617884390521, 0.05276395271793694, 0.05339894467599885, 0.05404122902595108, 0.05469088061271855, 0.05534797481563856, 0.05601258754556004, 0.05668479524168721, 0.05736467486815927, 0.05805230391035847, 0.05874776037093796, 0.05945112276556118, 0.06016247011834395, 0.06088188195699062, 0.06160943830761498, 0.06234521968923685, 0.06308930710794475, 0.06384178205071503, 0.06460272647887749, 0.06537222282121748, 0.06615035396670382, 0.06693720325683244, 0.06773285447757442, 0.06853739185091766, 0.06935090002599092, 0.07017346406975841, 0.07100516945727348, 0.07184610206147911, 0.07269634814254311, 0.07355599433671524, 0.07442512764469375, 0.07530383541948796, 0.0761922053537636, 0.07709032546665762, 0.0779982840900479, 0.07891616985426439, 0.07984407167322678, 0.080782078728994, 0.08173028045571061, 0.08268876652293486, 0.08365762681833229, 0.08463695142971955, 0.08562683062644182, 0.08662735484006731, 0.08763861464438216, 0.08866070073466832, 0.0896937039062471, 0.09073771503227059, 0.09179282504074257, 0.09285912489075084, 0.09393670554789181, 0.09502565795886844, 0.09612607302524215, 0.09723804157631863, 0.0983616543411477, 0.09949700191961672, 0.1006441747526167, 0.10180326309126, 0.1029743569651279, 0.104157546149527, 0.1053529201317313, 0.106560568076188, 0.1077805787886644, 0.1090130406793118, 0.1102580417246244, 0.1115156694282674, 0.1127860107807518, 0.1140691522179308, 0.1153651795782916, 0.1166741780590205, 0.1179962321708117, 0.1193314256913977, 0.1206798416177727, 0.1220415621170829, 0.123416668476158, 0.1248052410496551, 0.126207359206789, 0.12762310127662, 0.1290525444918714, 0.1304957649312496, 0.1319528374602361, 0.1334238356703241, 0.1349088318166695, 0.1364078967541282, 0.1379210998716484, 0.1394485090249893, 0.1409901904677353, 0.1425462087805763, 0.1441166267988222, 0.1457015055381225, 0.1473009041183589, 0.1489148796856807, 0.150543487332651, 0.1521867800164741, 0.1538448084752705, 0.1555176211423697, 0.1572052640585899, 0.1589077807824704, 0.1606252122984285, 0.162357596922807, 0.1641049702077824, 0.165867364843101, 0.1676448105556139, 0.1694373340065765, 0.1712449586866854, 0.1730677048088182, 0.1749055891984488, 0.1767586251817061, 0.178626822471046, 0.1805101870485094, 0.1824087210465336, 0.1843224226262908, 0.1862512858535249, 0.1881953005718583, 0.1901544522735415, 0.1921287219676189, 0.1941180860454861, 0.1961225161438112, 0.1981419790047974, 0.2001764363337633, 0.2022258446540165, 0.204290155159001, 0.2063693135616964, 0.2084632599412488, 0.2105719285868164, 0.212695247838612, 0.2148331399261252, 0.2169855208035118, 0.2191522999821361, 0.2213333803602548, 0.2235286580498328, 0.2257380222004822, 0.2279613548205192, 0.2301985305951333, 0.2324494167016664, 0.2347138726220028, 0.2369917499520703, 0.239282892208457, 0.2415871346321507, 0.2439043039894099, 0.2462342183697774, 0.2485766869812514, 0.2509315099426322, 0.2532984780730623, 0.2556773726787861, 0.2580679653371544, 0.2604700176779027, 0.2628832811617394, 0.2653074968562779, 0.2677423952093562, 0.2701876958197865, 0.2726431072055857, 0.2751083265697378, 0.2775830395635474, 0.2800669200476454, 0.2825596298507129, 0.2850608185259966, 0.2875701231056896, 0.2900871678532614, 0.2926115640138233, 0.2951429095626205, 0.2976807889517509, 0.3002247728552134, 0.3027744179123952, 0.3053292664701161, 0.3078888463233498, 0.3104526704547538, 0.3130202367731423, 0.3155910278510446, 0.318164510661501, 0.3207401363142506, 0.3233173397914782, 0.3258955396832913, 0.3284741379231084, 0.3310525195231461, 0.3336300523102032, 0.336206086661945, 0.3387799552439036, 0.341350972747416, 0.3439184356287307, 0.3464816218495267, 0.3490397906190915, 0.3515921821384216, 0.3541380173465135, 0.3566764976691244, 0.3592068047702944, 0.3617281003069295, 0.3642395256867559, 0.3667402018299672, 0.3692292289348981, 0.371705686248065, 0.3741686318389321, 0.3766171023797644, 0.3790501129309484, 0.3814666567321668, 0.3838657049998296, 0.386246206731172, 0.3886070885154458, 0.3909472543526382, 0.3932655854801679, 0.395560940208019, 0.3978321537627855, 0.4000780381411118, 0.4022973819730263, 0.4044889503956798, 0.4066514849380105, 0.4087837034168688, 0.4108842998451539, 0.4129519443525175, 0.4149852831192099, 0.4169829383236522, 0.4189435081043315, 0.4208655665366287, 0.4227476636251996, 0.4245883253125419, 0.4263860535043953, 0.428139326112627, 0.4298465971162738, 0.4315062966414193, 0.4331168310605928, 0.4346765831123948, 0.4361839120420552, 0.4376371537636483, 0.439034621044692, 0.4403746037138724, 0.4416553688926386, 0.4428751612514281, 0.4440322032912828, 0.4451246956516282, 0.4461508174449928, 0.4471087266194526, 0.4479965603495863, 0.448812435456739, 0.4495544488593869, 0.4502206780544069, 0.4508091816300516, 0.4513179998114337, 0.4517451550393241, 0.4520886525830671, 0.452346481188414, 0.4525166137610737, 0.4525970080867768, 0.4525856075886397, 0.4524803421226156, 0.452279128811804, 0.451979872920388, 0.4515804687679518, 0.4510788006849229, 0.4504727440098678, 0.4497601661293537, 0.4489389275610733, 0.4480068830809079, 0.4469618828945851, 0.4458017738545629, 0.4445244007227477, 0.4431276074796243, 0.4416092386803478, 0.4399671408583169, 0.438199163976707, 0.4363031629284143, 0.4342769990848122, 0.4321185418936868, 0.4298256705266678, 0.4273962755764266, 0.4248282608038591, 0.4221195449354194, 0.4192680635107106, 0.4162717707803803, 0.4131286416543032, 0.4098366736999657, 0.4063938891908976, 0.4027983372049189, 0.3990480957718959, 0.3951412740706129, 0.3910760146742852, 0.3868504958441471, 0.3824629338704544, 0.3779115854601461, 0.3731947501703046, 0.3683107728864529, 0.3632580463446138, 0.3580350136959458, 0.3526401711126503, 0.347072070433727, 0.3413293218490256, 0.3354105966199169, 0.3293146298347703, 0.323040223197293, 0.3165862478456435, 0.3099516472000913, 0.3031354398368509, 0.2961367223855672, 0.2889546724477824, 0.2815885515335611, 0.2740377080132957, 0.2663015800815605, 0.2583796987297259, 0.2502716907238894, 0.2419772815845217, 0.2334962985640728, 0.2248286736186286, 0.2159744463695561, 0.2069337670509261, 0.1977068994383579, 0.1882942237547862, 0.1786962395485161, 0.1689135685388007, 0.1589469574240508, 0.1487972806476694, 0.138465543116397, 0.1279528828659523, 0.1172605736686636, 0.1063900275777094, 0.0953427974025167, 0.08412057910981212, 0.07272521414477695, 0.06115869166672776, 0.04942315069372718, 0.03752088215052599, 0.02545433081424728, 0.01322609715224554, 0.0008389390466081267, -0.01170422660018733, -0.02440032238389051, -0.03724610904673676, -0.05023818426064031, -0.06337298155053987, -0.07664676936303441, -0.09005565028536779, -0.1035955604197438, -0.1172622689178823, -0.1310513776806634, -0.1449583212276577, -0.1589783667413013, -0.173106614290455, -0.1873379972380877, -0.201667282837841, -0.2160890730242749, -0.2305978054016538, -0.2451877544362191, -0.2598530328569897, -0.2745875932702522, -0.2893852299930237, -0.3042395811109075, -0.3191441307658805, -0.334092211679681, -0.3490770079185562, -0.3640915579052038, -0.3791287576837784, -0.3941813644438145, -0.4092420003088535, -0.4243031563954222, -0.439357197147804, -0.4543963649537532, -0.4694127850459301, -0.4843984706933735, -0.49934532868677, -0.5142451651206428, -0.5290896914748476, -0.5438705309969464, -0.5585792253861394, -0.5732072417784576, -0.587745980031893, -0.6021867803090383, -0.6165209309536751, -0.6307396766565625, -0.644834226904464, -0.6587957647052192, -0.6726154555804188, -0.6862844568159915, -0.6997939269597694, -0.713135035553864, -0.726298973088476, -0.7392769611625791, -0.7520602628357677, -0.764640193154446, -0.7770081298344763, -0.7891555240813861, -0.8010739115282716, -0.8127549232706415, -0.8241902969765998, -0.8353718880500008, -0.8462916808235073, -0.8569417997578582, -0.8673145206231007, -0.8774022816370723, -0.8871976945360331, -0.8966935555520381, -0.9058828562714335, -0.9147587943487198, -0.9233147840499941, -0.9315444666002345, -0.9394417203088247, -0.9470006704479667, -0.954215698858941, -0.9610814532616063, -0.9675928562430295, -0.9737451139017465, -0.9795337241248344, -0.9849544844757563, -0.9900034996717921, -0.9946771886308138, -0.9989722910681743, -1.002885873625574, -1.006415335514922, -1.009558413661449, -1.012313187331581, -1.014678082232475, -1.016651874071444, -1.018233691564988, -1.019423018888575, -1.020219697559842, -1.020623927749398, -1.020636269014959, -1.020257640456098, -1.019489320288441, -1.01833294483772, -1.016790506955599, -1.014864353860768, -1.012557184410262, -1.009872045807467, -1.006812329754706, -1.003381768059705, -0.9995844277065885, -0.9954247054033475, -0.9909073216189985, -0.9860373141247893, -0.9808200310549599, -0.9752611235035945, -0.9693665376750831, -0.9631425066065962, -0.95659554148181, -0.9497324225558429, -0.942560189712029, -0.9350861326717287, -0.9273177808788594, -0.9192628930812505, -0.9109294466312411, -0.902325626528194, -0.8934598142257615, -0.8843405762268292, -0.8749766524890796, -0.8653769446640611, -0.8555505041925178, -0.845506520278552, -0.835254307765175, -0.8248032949316007, -0.814163009372204, -0.8033430597464551, -0.792353125197869, -0.7812029531838939, -0.7699023416949285, -0.7584611288463743, -0.7468891804780589, -0.7351963781649874, -0.7233926072891917, -0.7114877452321724, -0.6994916497007433, -0.6874141472016044, -0.6752650216793242, -0.6630540033317784, -0.6507907576164551, -0.6384848744604239, -0.6261458576861592, -0.6137831146648203, -0.6014059462080241, -0.5890235367085978, -0.5766449445402697, -0.5642790927257598, -0.5519347598822416, -0.539620571452702, -0.527344991231283, -0.5151163131902834, -0.502942653616112, -0.4908319435611176, -0.4787919216178527, -0.4668301270220611, -0.4549538930902748, -0.4431703409976512, -0.4314863739014124, -0.4199086714149433, -0.4084436844373249, -0.3970976303428134, -0.3858764885345013, -0.3747859963661168, -0.3638316454356407, -0.3530186782541189, -0.3423520852927535, -0.3318366024110249, -0.3214767086682693, -0.3112766245207706, -0.3012403104060475, -0.2913714657156121, -0.2816735281570417, -0.2721496735057441, -0.2628028157463104, -0.2536356076028238, -0.2446504414569433, -0.2358494506519983, -0.2272345111807139, -0.218807243753543, -0.2105690162439051, -0.2025209465059291, -0.1946639055595672, -0.1869985211371892, -0.1795251815849956, -0.1722440401117836, -0.1651550193767937, -0.1582578164075369, -0.1515519078376674, -0.1450365554541306, -0.1387108120419742, -0.1325735275143796, -0.1266233553146441, -0.1208587590760343, -0.115278019524645, -0.1098792416096259, -0.104660361844411, -0.0996191558418796, -0.09475324602572505, -0.09006010949969043, -0.08553708605577222, -0.08118138630198585, -0.07699009988984339, -0.07296020382131337, -0.0690885708147223, -0.06537197770881993, -0.061807113884068, -0.05839058968013044, -0.05511894478854148, -0.05198865659961041, -0.04899614848278937, -0.04613779797998339, -0.04340994489162108, -0.04080889923572972, -0.0383309490607684, -0.03597236809356671, -0.03372942320439187, -0.03159838167192185, -0.02957551823173176, -0.02765712189280381, -0.02583950250754074, -0.02411899708179608, -0.02249197581252554, -0.02095484784180638, -0.01950406671716069, -0.01813613554934628, -0.01684761186003928, -0.01563511211311852, -0.01449531592456554, -0.01342496994730835, -0.01242089142865453, -0.01147997143927182, -0.01059917777397489, -0.00977555752585762, -0.009006239336563658, -0.008288435326707434, -0.00761944271163588, -0.00699664510885134, -0.006417513544492758, -0.005879607167288864, -0.005380573679348868, -0.004918149494038228, -0.00449015963199505, -0.004094517367073174, -0.003729223634647816, -0.003392366215286457, -0.003082118707269776, -0.002796739301843662, -0.002534569375393368, -0.002294031912954813, -0.002073629777616881, -0.001871943840423704, -0.001687630985359436, -0.001519422003892729, -0.001366119393376954, -0.001226595073349166, -0.001099788033449769, -0.0009847019263005425, -0.0008804026182359821, -0.0007860157102869857, -0.0007007240412722902, -0.0006237651842673734, -0.0005544289470985554, -0.0004920548868576654, -0.0004360298477557288, -0.0003857855309385741, -0.0003407961041787968, -0.0003005758586427772, -0.0002646769192138927, -0.0002326870141388746, -0.0002042273090584041, -0.0001789503097901307, -0.0001565378375566435, -0.0001366990796964564, -0.000119168718266327, -0.0001037051383413707, -9.00887172481754e-05, -7.812019542779851e-05, -6.761912912201281e-05, -5.842242460895296e-05, -5.038295328445776e-05, -4.336824649358246e-05, -3.725926866325817e-05, -3.194926697183306e-05, -2.734269551383481e-05, -2.335421167802075e-05, -1.990774225262833e-05, -1.693561660241775e-05, -1.437776412625567e-05, -1.218097309920702e-05, -1.029820793018665e-05, -8.687981816674794e-06, -7.313781759850268e-06, -6.143542905433543e-06, -5.149169198587179e-06, -4.306097384719733e-06, -3.592901449294358e-06, -2.990934653795811e-06, -2.484006526499038e-06, -2.058091171240737e-06, -1.701071112250602e-06, -1.402484988303688e-06, -1.153359040157857e-06, -9.459948844362916e-07, -7.73785825292214e-07, -6.310838740916452e-07, -5.130749032480125e-07, -4.156632415238565e-07, -3.353698206170322e-07, -2.692424049643669e-07, -2.147758159144143e-07, -1.698443105045081e-07, -1.326278093295039e-07, -1.017341274336734e-07, -7.505454182638256e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "2S", + "angular_momentum": 0 + }, + { + "index": 1, + "radial_function": [ + 0.0009004316260845162, 0.000911733055811305, 0.0009231763311931879, 0.000934763232552686, 0.0009464954210281464, 0.0009583744190178056, 0.0009704021406205879, 0.0009825804308628524, 0.0009949111731014245, 0.001007396274148181, 0.001020037664535151, 0.00103283729880938, 0.001045797155829511, 0.001058919239065869, 0.001072205576904193, 0.001085658222953041, 0.001099279256354907, 0.001113070782101111, 0.001127034931350484, 0.001141173861751904, 0.00115548975777073, 0.001169984831019168, 0.001184661320590628, 0.001199521493398104, 0.001214567644516638, 0.001229802097529899, 0.00124522720488094, 0.001260845348227173, 0.001276658938799607, 0.001292670417766406, 0.001308882256600808, 0.001325296957453468, 0.001341917053529257, 0.001358745109468584, 0.001375783721733281, 0.001393035518997116, 0.001410503162540969, 0.001428189346652736, 0.001446096799032021, 0.001464228281199644, 0.001482586588912051, 0.001501174552580658, 0.001519995037696199, 0.001539050945258119, 0.001558345212209087, 0.001577880811874672, 0.001597660754408244, 0.001617688087241162, 0.001637965895538303, 0.00165849730265899, 0.001679285470623388, 0.001700333600584417, 0.001721644933305249, 0.001743222749642456, 0.001765070371034856, 0.001787191159998136, 0.001809588520625303, 0.001832265899093036, 0.001855226784173998, 0.001878474707755172, 0.001902013245362293, 0.001925846016690434, 0.001949976686140817, 0.001974408963363921, 0.001999146603808942, 0.002024193409279686, 0.002049553228496957, 0.002075229957667513, 0.002101227541059656, 0.002127549971585534, 0.002154201291390214, 0.002181185592447612, 0.002208507017163346, 0.002236169758984574, 0.002264178063016918, 0.002292536226648517, 0.002321248600181302, 0.002350319587469569, 0.002379753646565914, 0.002409555290374615, 0.002439729087312539, 0.002470279661977642, 0.002501211695825156, 0.002532529927851523, 0.002564239155286162, 0.002596344234291164, 0.002628850080668968, 0.002661761670578117, 0.002695084041257167, 0.002728822291756847, 0.002762981583680516, 0.002797567141933045, 0.002832584255478173, 0.002868038278104434, 0.002903934629199743, 0.00294027879453471, 0.00297707632705479, 0.003014332847681332, 0.003052054046121627, 0.003090245681688036, 0.003128913584126288, 0.003168063654453033, 0.003207701865802734, 0.003247834264283993, 0.0032884669698454, 0.00332960617715098, 0.003371258156465349, 0.003413429254548649, 0.003456125895561359, 0.003499354581979081, 0.00354312189551737, 0.003587434498066721, 0.003632299132637795, 0.003677722624316964, 0.003723711881232289, 0.003770273895530005, 0.003817415744361605, 0.003865144590881632, 0.003913467685256247, 0.003962392365682682, 0.004011926059419663, 0.004062076283828906, 0.004112850647427753, 0.004164256850953072, 0.004216302688436487, 0.004268996048291055, 0.004322344914409448, 0.00437635736727377, 0.004431041585077075, 0.00448640584485669, 0.004542458523639418, 0.004599208099598742, 0.00465666315322409, 0.004714832368502267, 0.004773724534111149, 0.004833348544625705, 0.004893713401736473, 0.004954828215480544, 0.005016702205485157, 0.005079344702224007, 0.005142765148286315, 0.005206973099658792, 0.005271978227020541, 0.00533779031705102, 0.005404419273751114, 0.005471875119777423, 0.005540167997789838, 0.0056093081718125, 0.005679306028608193, 0.005750172079066277, 0.00582191695960423, 0.005894551433582869, 0.005968086392735336, 0.006042532858609908, 0.006117901984026719, 0.006194205054548452, 0.00627145348996508, 0.006349658845792699, 0.006428832814786563, 0.006508987228468335, 0.006590134058667649, 0.006672285419078019, 0.006755453566827174, 0.006839650904061858, 0.006924889979547147, 0.007011183490280341, 0.007098544283119473, 0.007186985356426476, 0.007276519861725058, 0.007367161105373314, 0.007458922550251118, 0.00755181781746232, 0.007645860688051781, 0.007741065104737273, 0.00783744517365626, 0.00793501516612758, 0.008033789520428053, 0.008133782843583992, 0.008235009913177676, 0.008337485679168734, 0.008441225265730464, 0.008546243973101084, 0.008652557279449878, 0.008760180842758248, 0.008869130502715627, 0.008979422282630243, 0.009091072391354676, 0.009204097225226207, 0.009318513370021863, 0.009434337602928168, 0.009551586894525479, 0.00967027841078688, 0.00979042951509157, 0.009912057770252647, 0.01003518094055919, 0.01015981699383258, 0.01028598410349699, 0.01041370065066376, 0.01054298522622982, 0.01067385663298983, 0.0108063338877619, 0.0109404362235269, 0.01107618309158105, 0.01121359416370176, 0.01135268933432639, 0.01149348872274392, 0.01163601267529925, 0.01178028176760986, 0.01192631680679479, 0.01207413883371558, 0.01222376912522891, 0.01237522919645087, 0.01252854080303233, 0.01268372594344541, 0.01284080686128056, 0.01299980604755403, 0.01316074624302544, 0.01332365044052502, 0.01348854188729031, 0.01365544408731182, 0.01382438080368741, 0.01399537606098483, 0.01416845414761222, 0.01434363961819596, 0.01452095729596546, 0.01470043227514461, 0.01488208992334907, 0.01506595588398923, 0.01525205607867814, 0.01544041670964387, 0.01563106426214584, 0.01582402550689441, 0.0160193275024733, 0.01621699759776392, 0.01641706343437133, 0.01661955294905086, 0.01682449437613483, 0.01703191624995858, 0.01724184740728517, 0.0174543169897278, 0.01766935444616932, 0.0178869895351779, 0.018107252327418, 0.01833017320805579, 0.01855578287915812, 0.01878411236208396, 0.01901519299986753, 0.0192490564595919, 0.01948573473475218, 0.01972526014760712, 0.01996766535151797, 0.02021298333327363, 0.02046124741540066, 0.02071249125845707, 0.02096674886330861, 0.02122405457338621, 0.02148444307692325, 0.02174794940917126, 0.02201460895459263, 0.02228445744902885, 0.02255753098184272, 0.02283386599803307, 0.02311349930032021, 0.02339646805120064, 0.02368280977496914, 0.02397256235970658, 0.02426576405923159, 0.02456245349501425, 0.02486266965804987, 0.02516645191069084, 0.0254738399884346, 0.02578487400166556, 0.02609959443734889, 0.02641804216067386, 0.02674025841664458, 0.02706628483161571, 0.02739616341477079, 0.02772993655954067, 0.02806764704495952, 0.02840933803695589, 0.02875505308957591, 0.02910483614613623, 0.02945873154030349, 0.02981678399709771, 0.03017903863381644, 0.03054554096087671, 0.03091633688257156, 0.03129147269773798, 0.03167099510033292, 0.03205495117991396, 0.03244338842202115, 0.03283635470845651, 0.03323389831745741, 0.03363606792376011, 0.03404291259854966, 0.03445448180929208, 0.03487082541944484, 0.03529199368804149, 0.03571803726914601, 0.03614900721117281, 0.03658495495606756, 0.03702593233834445, 0.03747199158397515, 0.03792318530912456, 0.03837956651872855, 0.03884118860490839, 0.03930810534521702, 0.03978037090071147, 0.04025803981384633, 0.04074116700618242, 0.04122980777590521, 0.04172401779514689, 0.04222385310710645, 0.04272937012296119, 0.04324062561856395, 0.04375767673091926, 0.04428058095443192, 0.04480939613692153, 0.04534418047539589, 0.0458849925115763, 0.04643189112716777, 0.04698493553886655, 0.04754418529309771, 0.04810970026047504, 0.04868154062997539, 0.04925976690281959, 0.04984443988605156, 0.05043562068580762, 0.05103337070026708, 0.05163775161227568, 0.05224882538163288, 0.05286665423703393, 0.05349130066765744, 0.05412282741438912, 0.05476129746067172, 0.05540677402297169, 0.05605932054085224, 0.05671900066664259, 0.05738587825469307, 0.05806001735020519, 0.05874148217762595, 0.05943033712859518, 0.06012664674943471, 0.06083047572816765, 0.06154188888105624, 0.06226095113864599, 0.06298772753130431, 0.06372228317424074, 0.06446468325199636, 0.0652149930023896, 0.06597327769990481, 0.06673960263851092, 0.06751403311389584, 0.06829663440510336, 0.06908747175555813, 0.06988661035346426, 0.07069411531156336, 0.07151005164623679, 0.07233448425593735, 0.07316747789893471, 0.07400909717035942, 0.07485940647852936, 0.0757184700205426, 0.07658635175712061, 0.0774631153866847, 0.07834882431864938, 0.0792435416459153, 0.08014733011654437, 0.08106025210459974, 0.0819823695801324, 0.08291374407829682, 0.0838544366675767, 0.0848045079171029, 0.08576401786304409, 0.08673302597405164, 0.08771159111573897, 0.08869977151417616, 0.08969762471837985, 0.0907052075617785, 0.09172257612263297, 0.09274978568339155, 0.09378689068895925, 0.0948339447038602, 0.09589100036827213, 0.09695810935291198, 0.0980353223127505, 0.09912268883953496, 0.1002202574130978, 0.1013280753514289, 0.1024461887594903, 0.1035746424767499, 0.1047134800234128, 0.1058627435453265, 0.1070224737575389, 0.1081927098864848, 0.1093734896107789, 0.1105648490005918, 0.111766822455587, 0.1129794426413943, 0.1142027404245982, 0.1154367448062165, 0.1166814828536476, 0.1179369796310623, 0.1192032581282174, 0.1204803391876683, 0.1217682414303574, 0.1230669811795557, 0.1243765723831352, 0.1256970265341483, 0.1270283525896942, 0.1283705568880475, 0.1297236430640291, 0.1310876119625972, 0.1324624615506357, 0.1338481868269224, 0.1352447797302515, 0.1366522290456949, 0.1380705203089788, 0.1394996357089591, 0.140939553988174, 0.1423902503414579, 0.1438516963125969, 0.1453238596890109, 0.1468067043944432, 0.1483001903796445, 0.1498042735110343, 0.1513189054573277, 0.1528440335741115, 0.1543796007863603, 0.1559255454688791, 0.157481801324663, 0.1590482972611648, 0.160624957264462, 0.1622117002713171, 0.1638084400391246, 0.1654150850137415, 0.1670315381951981, 0.1686576970012879, 0.1702934531290366, 0.1719386924140529, 0.1735932946877632, 0.1752571336325384, 0.1769300766347163, 0.1786119846355327, 0.18030271197997, 0.1820021062635379, 0.1837100081770016, 0.1854262513490751, 0.1871506621871013, 0.1888830597157406, 0.190623255413694, 0.1923710530484894, 0.1941262485093613, 0.1958886296382592, 0.1976579760590207, 0.1994340590047499, 0.2012166411434448, 0.2030054764019204, 0.2048003097880776, 0.2066008772115728, 0.2084069053029443, 0.2102181112312586, 0.2120342025203422, 0.2138548768636666, 0.215679821937963, 0.2175087152156432, 0.2193412237761104, 0.221177004116047, 0.2230157019587718, 0.2248569520627633, 0.2267003780294522, 0.22854559211039, 0.2303921950139059, 0.2322397757113714, 0.2340879112431963, 0.2359361665246839, 0.2377840941518843, 0.2396312342075843, 0.241477114067584, 0.2433212482074129, 0.2451631380096467, 0.2470022715719931, 0.2488381235163194, 0.2506701547988034, 0.2524978125213974, 0.2543205297447981, 0.2561377253031278, 0.2579488036205362, 0.2597531545299406, 0.2615501530941306, 0.2633391594294726, 0.265119518532453, 0.2668905601093137, 0.2686515984090351, 0.2704019320599361, 0.2721408439101655, 0.2738676008723682, 0.2755814537728209, 0.2772816372053382, 0.2789673693902595, 0.2806378520388383, 0.2822922702233619, 0.2839297922533395, 0.285549569558109, 0.2871507365762164, 0.2887324106519378, 0.2902936919393181, 0.291833663314113, 0.2933513902940301, 0.2948459209676739, 0.2963162859326098, 0.2977614982429712, 0.2991805533670432, 0.3005724291552688, 0.3019360858191282, 0.3032704659213558, 0.3045744943779664, 0.3058470784725734, 0.3070871078834876, 0.3082934547241007, 0.3094649735970583, 0.310600501662744, 0.3116988587226009, 0.3127588473178243, 0.3137792528439719, 0.314758843682042, 0.3156963713465804, 0.3165905706513841, 0.317440159893375, 0.3182438410552286, 0.3190003000273433, 0.3197082068497477, 0.320366215974546, 0.320972966549508, 0.3215270827234154, 0.3220271739737809, 0.3224718354575593, 0.3228596483854758, 0.3231891804205962, 0.3234589861017701, 0.323667607292575, 0.3238135736563954, 0.3238954031582664, 0.3239116025941136, 0.3238606681480193, 0.3237410859781413, 0.3235513328319089, 0.3232898766911161, 0.3229551774475266, 0.3225456876095979, 0.3220598530409279, 0.3214961137310139, 0.3208529045989087, 0.3201286563303439, 0.319321796248881, 0.3184307492216348, 0.3174539386000997, 0.3163897871965918, 0.3152367182968026, 0.3139931567089381, 0.3126575298498971, 0.3112282688689183, 0.3097038098090995, 0.3080825948071663, 0.306363073331837, 0.3045437034610997, 0.3026229531986835, 0.3005993018299702, 0.2984712413175541, 0.2962372777366183, 0.2938959327502499, 0.2914457451247763, 0.2888852722851504, 0.2862130919103663, 0.2834278035688324, 0.2805280303935722, 0.2775124207970636, 0.2743796502254678, 0.2711284229519317, 0.267757473908581, 0.2642655705567508, 0.2606515147949262, 0.2569141449037894, 0.2530523375276883, 0.2490650096917618, 0.2449511208538684, 0.2407096749903769, 0.2363397227147883, 0.2318403634280599, 0.2272107474994089, 0.2224500784762704, 0.2175576153219846, 0.2125326746796807, 0.2073746331607202, 0.2020829296559522, 0.1966570676679228, 0.1910966176620675, 0.1854012194348038, 0.1795705844963242, 0.1736044984657756, 0.1675028234763963, 0.161265500588064, 0.1548925522045952, 0.1483840844930223, 0.1417402898019598, 0.1349614490760627, 0.1280479342634721, 0.1210002107130348, 0.113818839557986, 0.1065044800826834, 0.09905789206889104, 0.09147993811802095, 0.08377158594566216, 0.07593391064464886, 0.06796809691285303, 0.05987544124182551, 0.0516573540623563, 0.04331536184298083, 0.03485110913742175, 0.02626636057692851, 0.0175630028034574, 0.008743046339623861, -0.0001913726086438759, -0.009237990420816208, -0.01839441440589313, -0.02765812122599168, -0.03702645540340409, -0.04649662791511473, -0.05606571487873536, -0.06573065633379005, -0.07548825512225332, -0.0853351758722223, -0.09526794408858524, -0.1052829453545394, -0.1153764246478113, -0.1255444857754433, -0.1357830909310398, -0.146088060378402, -0.1564550722655439, -0.1668796625731511, -0.1773572252016369, -0.1878830122010572, -0.1984521341482644, -0.2090595606758102, -0.2197001211572517, -0.2303685055536486, -0.2410592654261888, -0.2517668151200025, -0.2624854331243497, -0.2732092636144559, -0.2839323181803391, -0.2946484777480037, -0.3053514946983572, -0.3160349951891503, -0.3266924816851157, -0.3373173357013037, -0.3479028207643727, -0.3584420855962729, -0.3689281675243944, -0.3793539961217939, -0.3897123970806064, -0.3999960963211703, -0.4101977243387536, -0.4203098207890776, -0.4303248393130921, -0.4402351526006623, -0.4500330576920096, -0.4597107815148875, -0.4692604866545919, -0.4786742773530132, -0.4879442057320241, -0.4970622782355879, -0.506020462284061, -0.5148106931332611, -0.5234248809299851, -0.5318549179547881, -0.5400926860419907, -0.5481300641660583, -0.5559589361827094, -0.5635711987123596, -0.5709587691527862, -0.5781135938072344, -0.5850276561135525, -0.5916929849593676, -0.5981016630677825, -0.6042458354376007, -0.6101177178216665, -0.6157096052265445, -0.6210138804164608, -0.6260230224041907, -0.6307296149113978, -0.6351263547808201, -0.6392060603226538, -0.6429616795775078, -0.6463862984783917, -0.6494731488943597, -0.6522156165386639, -0.654607248724562, -0.6566417619522971, -0.6583130493111959, -0.6596151876813346, -0.6605424447197844, -0.661089285617083, -0.6612503796102648, -0.661020606239533, -0.6603950613364651, -0.6593690627325006, -0.6579381556773701, -0.656098117958075, -0.653844964710034, -0.6511749529130311, -0.6480845855656844, -0.6445706155332326, -0.6406300490645684, -0.6362601489755708, -0.6314584374969486, -0.6262226987859494, -0.6205509811024544, -0.6144415986511246, -0.6078931330924063, -0.6009044347263326, -0.5934746233541555, -0.5856030888239269, -0.5772894912671875, -0.5685337610349314, -0.5593360983419783, -0.5496969726298057, -0.5396171216587529, -0.5290975503413257, -0.5181395293290695, -0.5067445933661748, -0.4949145394235849, -0.4826514246279309, -0.469957564000089, -0.4568355280185554, -0.4432881400231549, -0.4293184734748445, -0.4149298490875375, -0.4001258318479581, -0.3849102279395419, -0.3692870815863267, -0.3532606718326208, -0.3368355092740127, -0.3200163327549778, -0.3028081060479604, -0.2852160145283628, -0.2672454618593546, -0.2489020667000875, -0.2301916594482799, -0.2111202785433769, -0.1916941658499465, -0.171919764364007, -0.1518037181106633, -0.1313528676752471, -0.1105742479505225, -0.0894750853807963, -0.06806279540205992, -0.04634497998270148, -0.02432942527586014, -0.002024099385357209, 0.02056284975398835, 0.04342309638191039, 0.06654813880544938, 0.08992930118132007, 0.1135577351913601, 0.1374244216155619, 0.1615201718081993, 0.1858356290835378, 0.2103612700185566, 0.2350874056810136, 0.2600041827920362, 0.2851015848332275, 0.3103694331090283, 0.3357973877757702, 0.3613749488494883, 0.387091457205131, 0.4129360955803101, 0.4388978895971636, 0.4649657088162737, 0.4911282678368701, 0.5173741274577347, 0.5436916959133814, 0.5700692302001829, 0.5964948375070233, 0.6229564767650037, 0.6494419603305343, 0.6759389558158937, 0.7024349880810022, 0.7289174413997411, 0.7553735618136643, 0.7817904596853873, 0.8081551124632975, 0.8344543676685315, 0.8606749461143814, 0.8868034453674609, 0.9128263434590498, 0.9387300028540799, 0.9645006746842021, 0.9901245032503004, 1.015587530798707, 1.040875702574195, 1.065974872151634, 1.090870807046936, 1.115549194606669, 1.139995648174401, 1.164195713530527, 1.18813487560099, 1.211798565428982, 1.235172167402352, 1.258241026728088, 1.280990457143938, 1.303405748855879, 1.325472176688844, 1.347175008436865, 1.368499513397494, 1.389430971074219, 1.409954680029364, 1.430055966868913, 1.449720195339594, 1.468932775517619, 1.487679173067512, 1.505944918548674, 1.523715616746525, 1.540976956004445, 1.557714717532153, 1.573914784665695, 1.589563152053886, 1.604645934745782, 1.61914937715367, 1.633059861866079, 1.646363918285426, 1.65904823106524, 1.671099648322307, 1.682505189599645, 1.693252053556956, 1.703327625366089, 1.712719483790044, 1.721415407925285, 1.729403383588471, 1.736671609330242, 1.743208502060372, 1.749002702270495, 1.754043078842596, 1.758318733433683, 1.761819004429379, 1.764533470461747, 1.766451953489288, 1.767564521439938, 1.767861490420868, 1.767333426502044, 1.765971147083807, 1.763765721862138, 1.760708473408894, 1.756790977387922, 1.752005062431827, 1.746342809708056, 1.73979655220697, 1.732358873788699, 1.724022608029734, 1.714780836914468, 1.704626889421158, 1.693554340056166, 1.681557007394636, 1.668628952690158, 1.654764478620318, 1.639958128239363, 1.624204684213509, 1.607499168418637, 1.589836841984306, 1.571213205872051, 1.551624002079893, 1.531065215568803, 1.509533077010524, 1.487024066459637, 1.463534918056045, 1.439062625867122, 1.41360445098161, 1.387157929969905, 1.359720884827664, 1.331291434521629, 1.301868008258218, 1.271449360596692, 1.24003458852964, 1.207623150653984, 1.174214888555792, 1.139810050531771, 1.104409317769435, 1.068013833106546, 1.030625232488476, 0.9922456792396311, 0.952877901261965, 0.9125252312698523, 0.8711916501661876, 0.828881833659456, 0.785601202215668, 0.7413559744324302, 0.6961532239149881, 0.6500009397257805, 0.6029080904698609, 0.5548846920684043, 0.5059418792613958, 0.4560919808684225, 0.4053485988232192, 0.3537266909831918, 0.3012426576994875, 0.2479144321162487, 0.1937615741493917, 0.1388053680755301, 0.08306892364043116, 0.02657728057356776, -0.03064248362917111, -0.0885611428188304, -0.1471472083951644, -0.2063668128205946, -0.2661835907016181, -0.3265585577175626, -0.3874499877280915, -0.4488132884416094, -0.5106008760235008, -0.5727620491134915, -0.6352428627498634, -0.6979860027527585, -0.7609306611353356, -0.8240124132397566, -0.8871630972862469, -0.9503143891562629, -1.013405945717686, -1.07637029445945, -1.139115600808456, -1.201556146163991, -1.263598461412328, -1.325144971947503, -1.386093419457548, -1.446336850163621, -1.505763578566488, -1.564257164081952, -1.621696403883224, -1.67795534335164, -1.732903305936643, -1.786404944284953, -1.838320314595545, -1.888504976249755, -1.93681011885642, -1.983082718939349, -2.027165728577448, -2.068898298385222, -2.108116037291678, -2.144651311636851, -2.178333586155809, -2.208989809459005, -2.236444846640478, -2.260521961652191, -2.281043352069214, -2.297830738834558, -2.310706013510904, -2.319491945476126, -2.324012951376878, -2.32409592899584, -2.319571157489732, -2.310273265712625, -2.296042270048221, -2.276724682831136, -2.2521746920361, -2.222255412450786, -2.186840208017746, -2.145814084428966, -2.099075150377959, -2.046536145114471, -1.988126029101303, -1.923791633637198, -1.85349936428048, -1.77723695178184, -1.695015243008773, -1.606870023016947, -1.512863857994425, -1.413087947273711, -1.307663970975908, -1.196745918124536, -1.080521878249175, -0.9592157775990137, -0.8330890391137414, -0.7024421432672815, -0.5676160648249436, -0.428993558456533, -0.2870002640506239, -0.1421056005065979, 0.005176585226156811, 0.1542876490006691, 0.3046240971214432, 0.4555378892709313, 0.60633713652517, 0.7562872352961277, 0.9046124786924533, 1.050498187000167, 1.193093398671758, 1.331514162291723, 1.464847468369129, 1.592155857398831, 1.712482737334532, 1.824858439327937, 1.928307035207131, 2.021853933592649, 2.10453426368316, 2.175402046491588, 2.233540142592398, 2.278070953179986, 2.308167837380577, 2.323067193271148, 2.322081132929654, 2.304610663094172, 2.270159262705165, 2.21834672685, 2.148923123578761, 2.06178268593038, 1.956977436579568, 1.834730317142389, 1.695447568796727, 1.539730086005097, 1.368383441385657, 1.182426257873909, 0.9830965850630257, 0.7718559209150987, 0.5503905089064325, 0.3206095352059821, 0.08463985186731171, -0.155183138512606, -0.3963287823574636, -0.6360889750359312, -0.8716007357392715, -1.099873777128869, -1.317823220427588, -1.522307522150309, -1.710171578116409, -1.878294853329175, -2.023644252703639, -2.143331297915305, -2.234673010844582, -2.29525572589768, -2.323000864359895, -2.316231507216088, -2.273738402843607, -2.194843847957843, -2.079461690556606, -1.928151529846371, -1.742165038795376, -1.523482219606755, -1.274835331482666, -0.9997182146890431, -0.70237878667734, -0.3877926164776839, -0.0616157039513853, 0.269885088969239, 0.599925078577309, 0.9213188503096416, 1.226621316763894, 1.508291879540058, 1.758879940822884, 1.971229020855451, 2.138695667755207, 2.255378223249953, 2.31634936698083, 2.317885246877936, 2.257682966340633, 2.135057301406481, 1.951106830882072, 1.708839253206093, 1.413245612385637, 1.071313538249728, 0.6919704956665378, 0.2859494959862439, -0.1344282008183505, -0.5555561475395865, -0.9629309328725536, -1.341621009204848, -1.676807095498279, -1.954380266937191, -2.161578781776865, -2.287639670891325, -2.324436419415276, -2.267070043592049, -2.114377888790909, -1.869322948548586, -1.539226832138465, -1.135812065142497, -0.6750245084161627, -0.1766145407060163, 0.3365336574360721, 0.8393219662640767, 1.305588781105119, 1.709453902804619, 2.026803177325909, 2.236839881505574, 2.323613156928579, 2.277420334561974, 2.095971510823904, 1.785202981731526, 1.359632722233694, 0.8421672917969013, 0.2632961010158538, -0.3403541115171306, -0.928085411481558, -1.457742531531107, -1.888743770910752, -2.18534075057045, -2.319867181519198 + ], + "label": "2S", + "angular_momentum": 0 + }, + { + "index": 2, + "radial_function": [ + 3.576763066794322e-07, 3.651994910029862e-07, 3.728809142182678e-07, 3.807239046428847e-07, 3.887414451201717e-07, 3.969575280119985e-07, 4.053712750063485e-07, 4.139881365436407e-07, 4.228126444511028e-07, 4.318494414796782e-07, 4.411032851833248e-07, 4.505790505774639e-07, 4.602817328898567e-07, 4.702164504017101e-07, 4.803884473587638e-07, 4.908030969564715e-07, 5.014659044009188e-07, 5.123825100473473e-07, 5.235586926181963e-07, 5.350003725026167e-07, 5.467136151394658e-07, 5.587046344858397e-07, 5.70979796573258e-07, 5.835456231536606e-07, 5.964087954374412e-07, 6.09576157925791e-07, 6.230547223396877e-07, 6.368516716479242e-07, 6.509743641966304e-07, 6.654303379428057e-07, 6.802273147944433e-07, 6.953732050598894e-07, 7.108761120091557e-07, 7.267443365499619e-07, 7.429863820213646e-07, 7.596109591078929e-07, 7.76626990877195e-07, 7.94043617944264e-07, 8.118702037653988e-07, 8.301163400651309e-07, 8.487918523994304e-07, 8.67906805858587e-07, 8.874715109132503e-07, 9.074965294071998e-07, 9.279926807005032e-07, 9.489710479668182e-07, 9.704429846486838e-07, 9.924201210747463e-07, 1.014914371242962e-06, 1.037937939773924e-06, 1.061503329038562e-06, 1.085623346464575e-06, 1.110311112026048e-06, 1.135580065920865e-06, 1.161443976440566e-06, 1.187916948037503e-06, 1.215013429594198e-06, 1.242748222899976e-06, 1.271136491340049e-06, 1.300193768802363e-06, 1.329935968807662e-06, 1.360379393868353e-06, 1.391540745081882e-06, 1.423437131964509e-06, 1.45608608253148e-06, 1.489505553629769e-06, 1.523713941529712e-06, 1.558730092782002e-06, 1.5945733153467e-06, 1.631263390001053e-06, 1.668820582033116e-06, 1.707265653228303e-06, 1.746619874156238e-06, 1.786905036765374e-06, 1.828143467293129e-06, 1.870358039499402e-06, 1.913572188231577e-06, 1.957809923329311e-06, 2.003095843877608e-06, 2.049455152816895e-06, 2.096913671919034e-06, 2.145497857138429e-06, 2.195234814347616e-06, 2.246152315466958e-06, 2.298278814998295e-06, 2.351643466972683e-06, 2.406276142322553e-06, 2.462207446688933e-06, 2.519468738674604e-06, 2.578092148554348e-06, 2.638110597453736e-06, 2.699557817008159e-06, 2.762468369514125e-06, 2.826877668585132e-06, 2.89282200032474e-06, 2.960338545029776e-06, 3.029465399436923e-06, 3.100241599526291e-06, 3.172707143895889e-06, 3.246903017721283e-06, 3.322871217315051e-06, 3.400654775301048e-06, 3.480297786418842e-06, 3.561845433974067e-06, 3.645344016950845e-06, 3.730840977802817e-06, 3.818384930939737e-06, 3.908025691927021e-06, 3.999814307416045e-06, 4.09380308582346e-06, 4.190045628778232e-06, 4.288596863355575e-06, 4.389513075117435e-06, 4.492851941979662e-06, 4.59867256892651e-06, 4.707035523593629e-06, 4.818002872741207e-06, 4.931638219639507e-06, 5.048006742389549e-06, 5.167175233202282e-06, 5.28921213866016e-06, 5.414187600985631e-06, 5.542173500341652e-06, 5.673243498189982e-06, 5.80747308173362e-06, 5.944939609470428e-06, 6.085722357885641e-06, 6.229902569311661e-06, 6.377563500984216e-06, 6.52879047532472e-06, 6.683670931479366e-06, 6.842294478146285e-06, 7.004752947722838e-06, 7.17114045180594e-06, 7.341553438079095e-06, 7.516090748620689e-06, 7.694853679668908e-06, 7.877946042879539e-06, 8.065474228113831e-06, 8.25754726779446e-06, 8.454276902868636e-06, 8.655777650418314e-06, 8.862166872958473e-06, 9.073564849465474e-06, 9.29009484817846e-06, 9.511883201217904e-06, 9.739059381066463e-06, 9.971756078958415e-06, 1.021010928522509e-05, 1.045425837164486e-05, 1.070434617584757e-05, 1.096051908782423e-05, 1.122292713859446e-05, 1.149172409108505e-05, 1.176706753327462e-05, 1.204911897366053e-05, 1.233804393910577e-05, 1.263401207512457e-05, 1.293719724866758e-05, 1.324777765346826e-05, 1.356593591801409e-05, 1.389185921620761e-05, 1.422573938078399e-05, 1.456777301955321e-05, 1.491816163453702e-05, 1.527711174407215e-05, 1.564483500795322e-05, 1.602154835569072e-05, 1.640747411796077e-05, 1.680284016132596e-05, 1.720788002630782e-05, 1.762283306889401e-05, 1.804794460556492e-05, 1.848346606192672e-05, 1.892965512503981e-05, 1.938677589953406e-05, 1.985509906760416e-05, 2.033490205298087e-05, 2.082646918897626e-05, 2.133009189070337e-05, 2.184606883157323e-05, 2.237470612417457e-05, 2.291631750564433e-05, 2.347122452763944e-05, 2.403975675102323e-05, 2.462225194538251e-05, 2.521905629349412e-05, 2.583052460086275e-05, 2.64570205104547e-05, 2.709891672275531e-05, 2.77565952212808e-05, 2.843044750367857e-05, 2.912087481855321e-05, 2.982828840815856e-05, 3.055310975710003e-05, 3.129577084719429e-05, 3.20567144186376e-05, 3.283639423763704e-05, 3.36352753706633e-05, 3.4453834465487e-05, 3.529256003916449e-05, 3.615195277314356e-05, 3.70325258156626e-05, 3.793480509162221e-05, 3.885932962011116e-05, 3.980665183977433e-05, 4.077733794221356e-05, 4.177196821361782e-05, 4.279113738482326e-05, 4.383545499000868e-05, 4.490554573423704e-05, 4.600204987005828e-05, 4.712562358339428e-05, 4.827693938893166e-05, 4.945668653525387e-05, 5.066557141994924e-05, 5.190431801493748e-05, 5.317366830226268e-05, 5.447438272060711e-05, 5.580724062278566e-05, 5.717304074448743e-05, 5.857260168453684e-05, 6.000676239695351e-05, 6.147638269509628e-05, 6.298234376818406e-05, 6.45255487104925e-05, 6.610692306353299e-05, 6.772741537152747e-05, 6.938799775050006e-05, 7.108966647131395e-05, 7.283344255698963e-05, 7.462037239464881e-05, 7.645152836243588e-05, 7.832800947177758e-05, 8.02509420253495e-05, 8.22214802911269e-05, 8.424080719290625e-05, 8.631013501769248e-05, 8.843070614035668e-05, 9.060379376597786e-05, 9.283070269029243e-05, 9.511277007868469e-05, 9.745136626416155e-05, 9.984789556476518e-05, 0.0001023037971208878, 0.0001048205457529629, 0.00010739965284002, 0.0001100426672195975, 0.0001127511761095246, 0.0001155268060520905, 0.0001183712238811343, 0.0001212861377125977, 0.0001242732979590991, 0.0001273344983690982, 0.0001304715770912313, 0.0001336864177644159, 0.0001369809506343309, 0.0001403571536968976, 0.0001438170538693962, 0.0001473627281898719, 0.0001509963050454937, 0.00015471996543055, 0.0001585359442347749, 0.0001624465315627197, 0.0001664540740848953, 0.0001705609764214325, 0.0001747697025590182, 0.000179082777301889, 0.0001835027877576744, 0.0001880323848589041, 0.0001926742849210114, 0.0001974312712376796, 0.0002023061957144021, 0.000207301980541142, 0.0002124216199049987, 0.0002176681817438085, 0.0002230448095416254, 0.0002285547241670513, 0.0002342012257554024, 0.0002399876956357251, 0.0002459175983036906, 0.0002519944834414253, 0.0002582219879853535, 0.0002646038382431528, 0.0002711438520609476, 0.0002778459410418881, 0.0002847141128172884, 0.0002917524733715216, 0.0002989652294218956, 0.000306356690854759, 0.0003139312732191128, 0.0003216935002790309, 0.00032964800662622, 0.0003377995403540756, 0.0003461529657946237, 0.0003547132663197588, 0.0003634855472082279, 0.0003724750385798319, 0.000381687098398351, 0.0003911272155447291, 0.0004008010129620868, 0.0004107142508741597, 0.0004208728300787962, 0.0004312827953181794, 0.000441950338727472, 0.0004528818033636164, 0.0004640836868160591, 0.0004755626449012011, 0.0004873254954424143, 0.0004993792221374975, 0.000511730978515486, 0.0005243880919847641, 0.0005373580679744655, 0.0005506485941711933, 0.0005642675448531201, 0.0005782229853235769, 0.0005925231764462743, 0.0006071765792843444, 0.0006221918598454308, 0.0006375778939350962, 0.0006533437721208611, 0.0006694988048092296, 0.0006860525274381009, 0.0007030147057870112, 0.0007203953414076931, 0.0007382046771774891, 0.000756453202978196, 0.0007751516615029675, 0.0007943110541939487, 0.0008139426473133611, 0.000834057978150808, 0.0008546688613696158, 0.0008757873954950773, 0.0008974259695475115, 0.0009195972698231043, 0.0009423142868255439, 0.0009655903223515186, 0.0009894389967331912, 0.001013874256240817, 0.001038910380648726, 0.001064561990967944, 0.001090844057348764, 0.001117771907156666, 0.001145361233224986, 0.001173628102287848, 0.001202588963596866, 0.001232260657725223, 0.001262660425562769, 0.001293805917505823, 0.001325715202845444, 0.001358406779357962, 0.001391899583101642, 0.001426212998423375, 0.001461366868179379, 0.001497381504173916, 0.001534277697820107, 0.001572076731026959, 0.001610800387316799, 0.001650470963177324, 0.001691111279652569, 0.001732744694177113, 0.001775395112657917, 0.001819087001808219, 0.001863845401737981, 0.001909695938805408, 0.001956664838734131, 0.002004778940000675, 0.002054065707496886, 0.002104553246472035, 0.002156270316759357, 0.002209246347291833, 0.002263511450912052, 0.00231909643948104, 0.002376032839290975, 0.002434352906786737, 0.002494089644601286, 0.002555276817909894, 0.002617948971108264, 0.002682141444819625, 0.002747890393235901, 0.002815232801798071, 0.002884206505220868, 0.002954850205866966, 0.003027203492475836, 0.003101306859252438, 0.003177201725320946, 0.003254930454548689, 0.003334536375745492, 0.003416063803243613, 0.003499558057863418, 0.003585065488269994, 0.003672633492725798, 0.003762310541244481, 0.003854146198150969, 0.003948191145052843, 0.004044497204228043, 0.004143117362433841, 0.00424410579514203, 0.004347517891205138, 0.004453410277958487, 0.004561840846762812, 0.004672868778992051, 0.004786554572470877, 0.004902960068366421, 0.005022148478538505, 0.005144184413352672, 0.005269133909960067, 0.005397064461048187, 0.005528045044066325, 0.005662146150929403, 0.005799439818203702, 0.00593999965777786, 0.006083900888022298, 0.006231220365440028, 0.006382036616811638, 0.006536429871836961, 0.006694482096275742, 0.006856277025589378, 0.007021900199085487, 0.007191438994566857, 0.007364982663485978, 0.007542622366606072, 0.007724451210169212, 0.007910564282571787, 0.008101058691547185, 0.008296033601855218, 0.008495590273477401, 0.00869983210031678, 0.008908864649400597, 0.009122795700583572, 0.009341735286749185, 0.009565795734505736, 0.00979509170537358, 0.01002974023745923, 0.01026986078761165, 0.0105155752740552, 0.01076700811949347, 0.01102428629467709, 0.01128753936242851, 0.01155689952211547, 0.01183250165456475, 0.01211448336740638, 0.01240298504083844, 0.0126981498738011, 0.01300012393054813, 0.01330905618760315, 0.01362509858108689, 0.0139484060544008, 0.01427913660625159, 0.01461745133899998, 0.01496351450731609, 0.01531749356712274, 0.01567955922480692, 0.01604988548667835, 0.01642864970865313, 0.01681603264613887, 0.0172122185040969, 0.01761739498725543, 0.0180317533504464, 0.0184554884490373, 0.01888879878942784, 0.01933188657957983, 0.01978495777954709, 0.02024822215197073, 0.02072189331250346, 0.02120618878012491, 0.02170133002730817, 0.02220754252999636, 0.02272505581734561, 0.02325410352118967, 0.0237949234251788, 0.02434775751354433, 0.02491285201943753, 0.02549045747279011, 0.02608082874764099, 0.02668422510887208, 0.02730091025829373, 0.02793115238001784, 0.02857522418505481, 0.02923340295506772, 0.02990597058521502, 0.03059321362601027, 0.03129542332412526, 0.03201289566206, 0.03274593139660053, 0.03349483609598287, 0.03425992017567883, 0.03504149893271605, 0.03583989257844277, 0.03665542626964398, 0.03748843013791328, 0.03833923931718171, 0.03920819396930145, 0.0400956393075799, 0.04100192561815563, 0.04192740827910546, 0.04287244777716811, 0.04383740972196678, 0.04482266485760999, 0.04582858907154626, 0.04685556340054544, 0.04790397403367534, 0.04897421231213991, 0.05006667472584093, 0.05118176290652195, 0.05231988361735032, 0.05348144873878867, 0.05466687525060458, 0.05587658520986312, 0.05711100572474388, 0.05837056892402007, 0.05965571192203414, 0.06096687677900109, 0.06230451045646624, 0.06366906476774205, 0.06506099632314398, 0.06648076646984272, 0.06792884122614659, 0.06940569121002427, 0.07091179156167524, 0.07244762185995186, 0.07401366603243387, 0.07561041225895314, 0.07723835286836343, 0.0788979842283474, 0.08058980662804965, 0.08231432415332274, 0.08407204455437026, 0.0858634791055683, 0.08768914245724539, 0.08954955247919798, 0.09144523009571708, 0.09337669911190036, 0.09534448603102173, 0.0973491198627301, 0.09939113192184701, 0.1014710556175336, 0.1035894262325947, 0.1057467806926901, 0.1079436573252216, 0.1101805956076647, 0.1124581359051167, 0.1147768191968315, 0.1171371867915141, 0.1195397800311512, 0.1219851399831547, 0.1244738071205974, 0.1270063209903271, 0.129583219868745, 0.132205040405041, 0.1348723172516838, 0.1375855826819676, 0.1403453661944252, 0.1431521941039222, 0.146006589119258, 0.1489090699071021, 0.1518601506421093, 0.1548603405430622, 0.1579101433949026, 0.1610100570565248, 0.1641605729542134, 0.167362175560626, 0.1706153418592293, 0.1739205407941179, 0.1772782327051569, 0.1806888687484079, 0.1841528903018163, 0.1876707283561559, 0.1912428028912476, 0.1948695222374895, 0.198551282422759, 0.2022884665047693, 0.2060814438889896, 0.2099305696322605, 0.213836183732268, 0.2177986104030611, 0.2218181573368343, 0.2258951149522197, 0.2300297556293704, 0.2342223329321463, 0.238473080817748, 0.2427822128341791, 0.2471499213059545, 0.2515763765085049, 0.2560617258317704, 0.2606060929335118, 0.2652095768829063, 0.2698722512950382, 0.2745941634569314, 0.2793753334458142, 0.2842157532403473, 0.2891153858255873, 0.2940741642925011, 0.2990919909328837, 0.3041687363305812, 0.3093042384499513, 0.3144983017225427, 0.3197506961330094, 0.3250611563053143, 0.3304293805903145, 0.3358550301558553, 0.3413377280805336, 0.3468770584523216, 0.3524725654732732, 0.3581237525715572, 0.3638300815220912, 0.3695909715770657, 0.3754057986076643, 0.3812738942583059, 0.3871945451147368, 0.3931669918873111, 0.3991904286108001, 0.4052640018620664, 0.4113868099969357, 0.4175579024075883, 0.4237762788017777, 0.4300408885051702, 0.4363506297880751, 0.4427043492178173, 0.4491008410379771, 0.4555388465756974, 0.4620170536782351, 0.4685340961799069, 0.4750885534005575, 0.4816789496766654, 0.4883037539261793, 0.4949613792481803, 0.5016501825584625, 0.5083684642621407, 0.5151144679644152, 0.521886380220668, 0.5286823303271168, 0.5355003901533244, 0.5423385740179598, 0.5491948386093074, 0.5560670829521654, 0.5629531484229139, 0.5698508188147152, 0.5767578204549928, 0.5836718223775453, 0.5905904365518729, 0.5975112181725208, 0.6044316660114865, 0.6113492228369686, 0.6182612759019721, 0.6251651575064981, 0.6320581456372536, 0.6389374646889858, 0.6458002862716944, 0.6526437301080699, 0.659464865025574, 0.666260710047574, 0.6730282355879026, 0.6797643647531014, 0.6864659747564427, 0.693129898447589, 0.6997529259614657, 0.7063318064895678, 0.7128632501765244, 0.7193439301442803, 0.7257704846457681, 0.7321395193493945, 0.7384476097550972, 0.7446913037421372, 0.7508671242481706, 0.756971572078522, 0.76300112884395, 0.7689522600245668, 0.7748214181569473, 0.7806050461408575, 0.7862995806614306, 0.7919014557220475, 0.7974071062826212, 0.8028129719974566, 0.8081155010463564, 0.8133111540521659, 0.8183964080775071, 0.823367760693036, 0.8282217341091719, 0.8329548793628954, 0.8375637805508865, 0.8420450590999876, 0.8463953780657109, 0.8506114464492893, 0.8546900235235714, 0.8586279231578949, 0.8624220181319546, 0.8660692444285705, 0.8695666054952087, 0.8729111764640751, 0.8761001083206034, 0.8791306320102026, 0.8820000624732001, 0.8847058025980229, 0.8872453470828048, 0.8896162861957813, 0.8918163094250491, 0.893843209008509, 0.8956948833350888, 0.8973693402086625, 0.8988646999664159, 0.9001791984437916, 0.9013111897785482, 0.9022591490469061, 0.9030216747252091, 0.9035974909710243, 0.9039854497181098, 0.904184532580217, 0.9041938525592449, 0.9040126555538359, 0.9036403216650917, 0.9030763662966831, 0.902320441047244, 0.9013723343935466, 0.9002319721635922, 0.8988994177993626, 0.8973748724096106, 0.8956586746136831, 0.8937513001779898, 0.8916533614473281, 0.889365606573877, 0.8868889185472432, 0.8842243140295071, 0.88137294199976, 0.8783360822131372, 0.8751151434798495, 0.8717116617701819, 0.8681272981518654, 0.8643638365666367, 0.8604231814531728, 0.8563073552239282, 0.8520184956037128, 0.8475588528381038, 0.8429307867800315, 0.8381367638630579, 0.8331793539700262, 0.8280612272058757, 0.8227851505834857, 0.8173539846314507, 0.8117706799326898, 0.806038273602747, 0.8001598857165639, 0.7941387156923932, 0.7879780386413678, 0.7816812016910615, 0.7752516202911579, 0.7686927745091029, 0.7620082053235233, 0.7552015109214181, 0.748276341295493, 0.741236389081663, 0.7340853874365862, 0.726827115682161, 0.7194653906838664, 0.7120040649623319, 0.7044470229888555, 0.6967981778143373, 0.6890614677195902, 0.681240852940822, 0.6733403124469809, 0.6653638407841957, 0.6573154449868065, 0.64919914155654, 0.641018953510986, 0.6327789075021016, 0.6244830310050865, 0.6161353495775791, 0.6077398841887728, 0.5993006486176935, 0.5908216469195626, 0.582306870958867, 0.5737602980074739, 0.5651858884058808, 0.5565875832854572, 0.5479693023493346, 0.5393349417094254, 0.5306883717769116, 0.5220334352033974, 0.5133739448699016, 0.5047136819207013, 0.4960563938390591, 0.4874057925618837, 0.4787655526303648, 0.4701393093736753, 0.4615306571229124, 0.4529431474525533, 0.4443802874468246, 0.4358455379885373, 0.4273423120681097, 0.4188739731106954, 0.4104438333195456, 0.4020551520339687, 0.3937111341004932, 0.3854149282561085, 0.3771696255227293, 0.3689782576123173, 0.3608437953423921, 0.3527691470619621, 0.3447571570882195, 0.3368106041546547, 0.328932199871555, 0.3211245872001697, 0.3133903389421338, 0.3057319562460429, 0.2981518671333769, 0.2906524250462542, 0.2832359074197814, 0.2759045142820293, 0.2686603668849155, 0.2615055063695163, 0.2544418924695422, 0.2474714022569169, 0.2405958289335735, 0.2338168806737427, 0.2271361795211351, 0.2205552603455334, 0.2140755698633903, 0.2076984657270821, 0.2014252156875018, 0.1952569968346701, 0.1891948949210195, 0.1832399037719458, 0.1773929247881395, 0.1716547665440901, 0.1660261444870147, 0.1605076807402894, 0.1550999040152579, 0.1498032496350661, 0.1446180596739124, 0.1395445832148214, 0.1345829767287432, 0.1297333045774466, 0.1249955396423217, 0.1203695640808332, 0.1158551702119701, 0.1114520615316269, 0.1071598538584237, 0.1029780766100314, 0.0989061742096165, 0.09494350762155594, 0.09108935601510865, 0.0873429185542518, 0.08370331631141872, 0.08016959430239903, 0.07674072363919034, 0.07341560379712496, 0.07019306499213758, 0.06707187066359176, 0.06405072005765075, 0.06112825090575941, 0.05830304219240388, 0.05557361700593776, 0.0529384454659059, 0.05039594771996667, 0.04794449700320882, 0.04558242275238423, 0.04330801376733296, 0.04111952141166525, 0.03901516284458646, 0.03699312427560749, 0.03505156423377555, 0.0331886168429896, 0.03140239509493108, 0.0296909941111449, 0.02805249438584778, 0.02648496500112089, 0.02498646680626109, 0.02355505555321904, 0.022188784980243, 0.02088570983607236, 0.01964388883728403, 0.0184613875516867, 0.01733628120098017, 0.01626665737624836, 0.01525061866023289, 0.01428628515073724, 0.01337179687993666, 0.01250531612481499, 0.01168502960441176, 0.01090915056004121, 0.01017592071513392, 0.00948361211185101, 0.008830528822125754, 0.008215008531296497, 0.007635423993003847, 0.00709018435453272, 0.006577736352281991, 0.006096565377539391, 0.005645196413223791, 0.005222194842728818, 0.004826167132458407, 0.004455761390084077, 0.004109667800973463, 0.003786618945637768, 0.003485390001420781, 0.003204798832002091, 0.002943705968610941, 0.002701014487143394, 0.002475669785643223, 0.002266659266845384, 0.002073011930689369, 0.001893797881887918, 0.001728127757784104, 0.00157515208184684, 0.001434060548241432, 0.001304081242968323, 0.001184479807090149, 0.001074558547565256, 0.0009736555011757919, 0.0008811434569812749, 0.000796428942644411, 0.0007189511798731625, 0.0006481810140630785, 0.0005836198231552807, 0.0005247984103881948, 0.0004712758855765303, 0.0004226385392997236, 0.0003784987140953755, 0.0003384936765445301, 0.000302284493841929, 0.0002695549180185805, 0.0002400102898341555, 0.0002133763541270613, 0.0001893988146617844, 0.0001678399693412417, 0.0001484827956101911, 0.0001311267738848909, 0.000115584518459277, 0.0001016824522711628, 8.926052781164656e-05, 7.817128375805755e-05, 6.827892540001164e-05, 5.945842671328974e-05, 5.159465142728564e-05, 4.458148883207336e-05, 3.832099603659937e-05, 3.27226211443472e-05, 2.77018970004653e-05, 2.319661794721779e-05, 1.905319716645324e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "2P", + "angular_momentum": 1 + }, + { + "index": 3, + "radial_function": [ + 2.445029147465395e-07, 2.496456666172498e-07, 2.548965885555077e-07, 2.602579557563459e-07, 2.657386431489964e-07, 2.71355051656266e-07, 2.771065808646659e-07, 2.829969566303146e-07, 2.890292768546963e-07, 2.952067152654087e-07, 3.015325240680185e-07, 3.080100357633454e-07, 3.146426650279286e-07, 3.214339106561735e-07, 3.283873575503372e-07, 3.355066787611633e-07, 3.427956375802891e-07, 3.502580896857052e-07, 3.578979853415702e-07, 3.657193716537201e-07, 3.737263948822444e-07, 3.819233028125341e-07, 3.90314447186248e-07, 3.98904286193674e-07, 4.07697387029005e-07, 4.166984285100837e-07, 4.259122037642151e-07, 4.353436229816785e-07, 4.449977162386217e-07, 4.548796363910537e-07, 4.649946620417034e-07, 4.753482005815494e-07, 4.85945791307881e-07, 4.967931086207854e-07, 5.078959653000169e-07, 5.192603158642433e-07, 5.308922600147217e-07, 5.427980461655036e-07, 5.549840750623248e-07, 5.674569034923899e-07, 5.802232480873141e-07, 5.932899892215474e-07, 6.066641750086591e-07, 6.203530253979258e-07, 6.343639363737229e-07, 6.48704484260287e-07, 6.633824301344777e-07, 6.784057243492365e-07, 6.937825111705052e-07, 7.09521133530439e-07, 7.256301378998199e-07, 7.421182792826462e-07, 7.589945263359539e-07, 7.762680666179976e-07, 7.939483119680018e-07, 8.120449040207697e-07, 8.305677198595222e-07, 8.495268778104248e-07, 8.689327433823428e-07, 8.887959353554615e-07, 9.091273320224907e-07, 9.299380775862723e-07, 9.512395887177058e-07, 9.73043561277998e-07, 9.953619772093538e-07, 1.018207111598316e-06, 1.041591539916085e-06, 1.065528145440233e-06, 1.090030126862367e-06, 1.115111006086377e-06, 1.140784636222064e-06, 1.167065209779005e-06, 1.193967267065697e-06, 1.221505704799099e-06, 1.249695784929843e-06, 1.278553143688515e-06, 1.308093800858532e-06, 1.338334169281294e-06, 1.369291064599417e-06, 1.40098171524401e-06, 1.433423772672103e-06, 1.466635321860483e-06, 1.500634892062361e-06, 1.535441467833437e-06, 1.571074500334119e-06, 1.60755391891479e-06, 1.644900142991218e-06, 1.683134094217358e-06, 1.722277208963e-06, 1.762351451103877e-06, 1.803379325132052e-06, 1.845383889594607e-06, 1.888388770868828e-06, 1.932418177282321e-06, 1.977496913586673e-06, 2.023650395793504e-06, 2.07090466638198e-06, 2.119286409887061e-06, 2.168822968878013e-06, 2.219542360336952e-06, 2.271473292447393e-06, 2.324645181803083e-06, 2.379088171047605e-06, 2.434833146955532e-06, 2.491911758966154e-06, 2.550356438181093e-06, 2.610200416837411e-06, 2.671477748268069e-06, 2.734223327361921e-06, 2.798472911535734e-06, 2.864263142231011e-06, 2.931631566948718e-06, 3.000616661835372e-06, 3.071257854834231e-06, 3.143595549415716e-06, 3.217671148901515e-06, 3.293527081397192e-06, 3.371206825348488e-06, 3.450754935736882e-06, 3.532217070930364e-06, 3.615640020205761e-06, 3.701071731959386e-06, 3.788561342623154e-06, 3.878159206303799e-06, 3.969916925163177e-06, 4.063887380558184e-06, 4.160124764959188e-06, 4.2586846146664e-06, 4.359623843344081e-06, 4.463000776392943e-06, 4.568875186181656e-06, 4.677308328158845e-06, 4.788362977867526e-06, 4.902103468884447e-06, 5.018595731707375e-06, 5.137907333613928e-06, 5.260107519516141e-06, 5.385267253835555e-06, 5.513459263424225e-06, 5.644758081557666e-06, 5.779240093026413e-06, 5.916983580353543e-06, 6.058068771166116e-06, 6.202577886749276e-06, 6.350595191812394e-06, 6.502207045497378e-06, 6.657501953660032e-06, 6.816570622456099e-06, 6.979506013264397e-06, 7.146403398980256e-06, 7.317360421713286e-06, 7.492477151924369e-06, 7.671856149037576e-06, 7.855602523563613e-06, 8.04382400077234e-06, 8.236630985952744e-06, 8.4341366312998e-06, 8.63645690446849e-06, 8.843710658836408e-06, 9.056019705517203e-06, 9.273508887168335e-06, 9.496306153637542e-06, 9.724542639493602e-06, 9.95835274348802e-06, 1.019787420999549e-05, 1.044324821248208e-05, 1.069461943905131e-05, 1.095213618011958e-05, 1.121595041827357e-05, 1.148621792036359e-05, 1.176309833188815e-05, 1.204675527372638e-05, 1.233735644127627e-05, 1.26350737060583e-05, 1.294008321984511e-05, 1.325256552137979e-05, 1.357270564574657e-05, 1.390069323645932e-05, 1.423672266033509e-05, 1.458099312522104e-05, 1.493370880064545e-05, 1.529507894146463e-05, 1.566531801457963e-05, 1.604464582879834e-05, 1.643328766792047e-05, 1.68314744271246e-05, 1.723944275273869e-05, 1.765743518547723e-05, 1.808570030723019e-05, 1.852449289149117e-05, 1.897407405751413e-05, 1.943471142829025e-05, 1.990667929243878e-05, 2.039025877010788e-05, 2.088573798298392e-05, 2.139341222850989e-05, 2.191358415841619e-05, 2.244656396166951e-05, 2.299266955194796e-05, 2.355222675975328e-05, 2.412556952927377e-05, 2.471304012011399e-05, 2.531498931401041e-05, 2.593177662665481e-05, 2.656377052475033e-05, 2.721134864842791e-05, 2.7874898039154e-05, 2.855481537326369e-05, 2.925150720125622e-05, 2.996539019299362e-05, 3.069689138894628e-05, 3.144644845763263e-05, 3.221450995940387e-05, 3.300153561672822e-05, 3.380799659113256e-05, 3.46343757669635e-05, 3.548116804213359e-05, 3.63488806260222e-05, 3.723803334470483e-05, 3.814915895368867e-05, 3.908280345833634e-05, 4.003952644216419e-05, 4.101990140320594e-05, 4.202451609863677e-05, 4.305397289785791e-05, 4.410888914424614e-05, 4.518989752577763e-05, 4.629764645474064e-05, 4.743280045675611e-05, 4.859604056933107e-05, 4.978806475017434e-05, 5.100958829550996e-05, 5.226134426862891e-05, 5.354408393892562e-05, 5.485857723167122e-05, 5.620561318878167e-05, 5.758600044084472e-05, 5.900056769067584e-05, 6.045016420867982e-05, 6.193566034030049e-05, 6.345794802584866e-05, 6.501794133300372e-05, 6.661657700229273e-05, 6.82548150058564e-05, 6.993363911981955e-05, 7.165405751059064e-05, 7.341710333542212e-05, 7.522383535757152e-05, 7.707533857641073e-05, 7.897272487283895e-05, 8.091713367036277e-05, 8.290973261221551e-05, 8.495171825489638e-05, 8.70443167785181e-05, 8.918878471436165e-05, 9.138640969004472e-05, 9.363851119272034e-05, 9.594644135073163e-05, 9.83115857341577e-05, 0.0001007353641746964, 0.0001032192316053389, 0.0001057646789203017, 0.0001083732338556927, 0.000111046461891397, 0.0001137859671746811, 0.000116593393466024, 0.0001194704251076951, 0.0001224187880156111, 0.0001254402506950151, 0.0001285366252805344, 0.0001317097686011837, 0.0001349615832708957, 0.0001382940188051718, 0.0001417090727644596, 0.0001452087919248763, 0.0001487952734769134, 0.0001524706662527676, 0.000156237171982961, 0.0001600970465829264, 0.0001640526014702475, 0.00016810620491326, 0.0001722602834117347, 0.000176517323110379, 0.0001808798712459088, 0.0001853505376284594, 0.0001899319961581203, 0.000194626986377396, 0.0001994383150604112, 0.0002043688578396967, 0.0002094215608714101, 0.0002145994425398633, 0.0002199055952022474, 0.0002253431869744631, 0.000230915463558987, 0.0002366257501157187, 0.0002424774531767779, 0.0002484740626062383, 0.0002546191536058057, 0.0002609163887674706, 0.0002673695201741819, 0.0002739823915496168, 0.0002807589404581368, 0.0002877032005560478, 0.0002948193038952993, 0.0003021114832807871, 0.0003095840746824412, 0.0003172415197033073, 0.0003250883681048554, 0.0003331292803907708, 0.0003413690304505088, 0.0003498125082639207, 0.0003584647226682832, 0.0003673308041890887, 0.0003764160079359818, 0.0003857257165652539, 0.0003952654433103339, 0.0004050408350817404, 0.0004150576756379924, 0.0004253218888289968, 0.0004358395419134677, 0.0004466168489519547, 0.0004576601742770919, 0.0004689760360427062, 0.0004805711098534539, 0.0004924522324766881, 0.0005046264056382841, 0.0005171007999041909, 0.0005298827586494967, 0.0005429798021168394, 0.0005563996315660176, 0.0005701501335166954, 0.0005842393840861226, 0.0005986756534238305, 0.0006134674102452916, 0.0006286233264665717, 0.0006441522819420285, 0.0006600633693071566, 0.0006763658989287032, 0.0006930694039642181, 0.0007101836455332391, 0.0007277186180023464, 0.0007456845543863533, 0.0007640919318679441, 0.0007829514774380957, 0.0008022741736596654, 0.0008220712645565574, 0.000842354261630918, 0.0008631349500108503, 0.0008844253947311683, 0.0009062379471497554, 0.0009285852515021208, 0.0009514802515967905, 0.0009749361976542036, 0.0009989666532918182, 0.001023585502658174, 0.001048806957718688, 0.001074645565696005, 0.001101116216667744, 0.001128234151324538, 0.001156014968891287, 0.001184474635214575, 0.001213629491019254, 0.001243496260337208, 0.001274092059111372, 0.001305434403978075, 0.001337541221230863, 0.001370430855968927, 0.001404122081433344, 0.001438634108534338, 0.001473986595572814, 0.001510199658159433, 0.001547293879334537, 0.001585290319892246, 0.001624210528912083, 0.001664076554501507, 0.001704910954752746, 0.00174673680891736, 0.001789577728801964, 0.001833457870388576, 0.001878401945683057, 0.001924435234795145, 0.001971583598253561, 0.00201987348955972, 0.002069331967983551, 0.002119986711604956, 0.002171866030604444, 0.002224998880806447, 0.002279414877478872, 0.002335144309392392, 0.002392218153142998, 0.002450668087741322, 0.002510526509472207, 0.002571826547028013, 0.002634602076919099, 0.002698887739164915, 0.002764718953269093, 0.002832131934481926, 0.002901163710353528, 0.002971852137580991, 0.003044235919152772, 0.003118354621793486, 0.003194248693712263, 0.003271959482657705, 0.003351529254282487, 0.003433001210820499, 0.003516419510079393, 0.003601829284751308, 0.003689276662044431, 0.003778808783637975, 0.003870473825963051, 0.003964321020811773, 0.004060400676276843, 0.004158764198023712, 0.004259464110897288, 0.00436255408086502, 0.004468088937298007, 0.004576124695591655, 0.004686718580127201, 0.004799929047575249, 0.00491581581054228, 0.005034439861560875, 0.005155863497424191, 0.005280150343865003, 0.005407365380579362, 0.005537574966594713, 0.005670846865982015, 0.005807250273911151, 0.005946855843048613, 0.006089735710296166, 0.006235963523868858, 0.006385614470710406, 0.006538765304243688, 0.006695494372453635, 0.006855881646299518, 0.007020008748453163, 0.007187958982359275, 0.00735981736161356, 0.007535670639653965, 0.007715607339759815, 0.007899717785353196, 0.008088094130596399, 0.008280830391278738, 0.008478022475985515, 0.00867976821754129, 0.008886167404719118, 0.009097321814206721, 0.009313335242819978, 0.009534313539953475, 0.009760364640257105, 0.009991598596527144, 0.01022812761279932, 0.01047006607763084, 0.01071753059755739, 0.01097064003071038, 0.01122951552057904, 0.01149428052990072, 0.0117650608746623, 0.01204198475819441, 0.01232518280533941, 0.01261478809667278, 0.01291093620275712, 0.01321376521840632, 0.01352341579693679, 0.01384003118438142, 0.0141637572536408, 0.01449474253854511, 0.01483313826779875, 0.01517909839877882, 0.01553277965115707, 0.01589434154031363, 0.01626394641050969, 0.01664175946778479, 0.01702794881254297, 0.01742268547179057, 0.01782614343098718, 0.01823849966546945, 0.01865993417140611, 0.01909062999624093, 0.01953077326857859, 0.01998055322746704, 0.0204401622510279, 0.02090979588438498, 0.02138965286683902, 0.02187993515823519, 0.02238084796446764, 0.02289259976206395, 0.02341540232179013, 0.02394947073121481, 0.0244950234161695, 0.02505228216103954, 0.02562147212781851, 0.0262028218738563, 0.02679656336822966, 0.02740293200666107, 0.0280221666249101, 0.02865450951055894, 0.02930020641311156, 0.02995950655232377, 0.03063266262467882, 0.03131993080792117, 0.03202157076355843, 0.03273784563723908, 0.03346902205691134, 0.03421537012866572, 0.03497716343016177, 0.03575467900153661, 0.03654819733369059, 0.03735800235384286, 0.03818438140824688, 0.03902762524195377, 0.03988802797550832, 0.04076588707846045, 0.04166150333957194, 0.04257518083359602, 0.0435072268845046, 0.04445795202503557, 0.04542766995243003, 0.04641669748022705, 0.04742535448598038, 0.04845396385476017, 0.04950285141829937, 0.05057234588964289, 0.05166277879315457, 0.05277448438973557, 0.05390779959710475, 0.05506306390499029, 0.05624061928507908, 0.05744081009556874, 0.05866398298016542, 0.05991048676136836, 0.06118067232788073, 0.06247489251598472, 0.0637935019847174, 0.06513685708468231, 0.06650531572033122, 0.06789923720554858, 0.06931898211237118, 0.0707649121126745, 0.07223738981265655, 0.07373677857995042, 0.0752634423631958, 0.0768177455039008, 0.0784000525404251, 0.08001072800391679, 0.08165013620603552, 0.08331864101829665, 0.08501660564287174, 0.08674439237468351, 0.08850236235463502, 0.09029087531381544, 0.09211028930852806, 0.09396096044598945, 0.09584324260055213, 0.09775748712030766, 0.09970404252393164, 0.1016832541876375, 0.1036954640221104, 0.1057410101393011, 0.1078202265089634, 0.1099334426048283, 0.1120809830403146, 0.1142631671936851, 0.1164803088225651, 0.118732715667751, 0.1210206890462462, 0.1233445234334736, 0.1257045060346241, 0.1281009163451142, 0.1305340257001384, 0.1330040968133166, 0.13551138330445, 0.1380561292164165, 0.14063856852125, 0.1432589246154679, 0.1459174098047274, 0.1486142247779091, 0.1513495580707474, 0.1541235855191456, 0.1569364697023374, 0.1597883593760735, 0.1626793888960398, 0.1656096776317326, 0.168579329371043, 0.1715884317158247, 0.1746370554687484, 0.1777252540117662, 0.1808530626765421, 0.184020498107226, 0.1872275576159796, 0.1904742185316882, 0.1937604375423215, 0.1970861500314319, 0.200451269409312, 0.2038556864393546, 0.2072992685601918, 0.2107818592042137, 0.2143032771130973, 0.2178633156509998, 0.2214617421160991, 0.2250982970511878, 0.2287726935540508, 0.2324846165883812, 0.2362337222960079, 0.2400196373112307, 0.2438419580780756, 0.2477002501712985, 0.2515940476219825, 0.2555228522485826, 0.2594861329942839, 0.2634833252715453, 0.2675138303147059, 0.2715770145415342, 0.2756722089245971, 0.2797987083733274, 0.2839557711276567, 0.2881426181640793, 0.2923584326149965, 0.2966023592021839, 0.3008735036852061, 0.3051709323255932, 0.3094936713675729, 0.3138407065361441, 0.318210982553255, 0.3226034026728441, 0.3270168282354882, 0.3314500782433963, 0.335901928956488, 0.3403711135102988, 0.3448563215564676, 0.349356198926582, 0.3538693473201873, 0.3583943240178072, 0.3629296416198772, 0.3674737678125576, 0.3720251251614745, 0.3765820909345273, 0.3811429969550139, 0.3857061294864405, 0.3902697291505189, 0.3948319908799998, 0.3993910639081399, 0.4039450517967662, 0.4084920125050586, 0.4130299585013412, 0.4175568569203268, 0.4220706297684125, 0.4265691541797622, 0.4310502627260276, 0.4355117437826613, 0.4399513419548385, 0.4443667585660467, 0.4487556522124018, 0.453115639385714, 0.457444295168252, 0.461739154002033, 0.4659977105353099, 0.4702174205487207, 0.4743957019633288, 0.4785299359324989, 0.4826174680192426, 0.4866556094603216, 0.4906416385180268, 0.494572801920157, 0.4984463163883127, 0.5022593702541973, 0.5060091251631861, 0.5096927178639964, 0.51330726208285, 0.5168498504801026, 0.520317556686886, 0.5237074374189079, 0.5270165346641531, 0.5302418779408539, 0.5333804866217352, 0.5364293723201928, 0.5393855413337425, 0.5422459971397711, 0.5450077429383363, 0.5476677842365043, 0.5502231314684668, 0.5526708026454666, 0.5550078260293608, 0.5572312428234762, 0.5593381098742609, 0.5613255023771061, 0.5631905165796068, 0.5649302724754451, 0.5665419164820223, 0.5680226240949297, 0.5693696025123359, 0.570580093222383, 0.5716513745467178, 0.5725807641333506, 0.5733656213921179, 0.5740033498661399, 0.574491399532798, 0.5748272690279227, 0.5750085077870674, 0.5750327180979539, 0.5748975570584155, 0.5746007384344162, 0.5741400344130116, 0.573513277245421, 0.5727183607757036, 0.5717532418508801, 0.5706159416087059, 0.5693045466396852, 0.5678172100203169, 0.5661521522149766, 0.56430766184427, 0.5622820963181333, 0.5600738823324083, 0.557681516228077, 0.5551035642128107, 0.5523386624449568, 0.5493855169805597, 0.5462429035844877, 0.5429096674072058, 0.5393847225292098, 0.5356670513755915, 0.5317557040036653, 0.5276497972670293, 0.5233485138598646, 0.5188511012457022, 0.5141568704752807, 0.5092651948985114, 0.5041755087759303, 0.4988873057953611, 0.4934001374998371, 0.4877136116331299, 0.4818273904095021, 0.4757411887145533, 0.4694547722442429, 0.4629679555893689, 0.4562806002729405, 0.4493926127480144, 0.4423039423636683, 0.4350145793068544, 0.4275245525279165, 0.4198339276575628, 0.411942804923068, 0.403851317071423, 0.3955596273070719, 0.3870679272517644, 0.3783764349339099, 0.3694853928146573, 0.3603950658577257, 0.351105739649952, 0.3416177185780232, 0.3319313233076486, 0.322046886689936, 0.3119647529615452, 0.3016852804309834, 0.2912088377443604, 0.2805358033001303, 0.2696665639019528, 0.2586015136381926, 0.2473410528517447, 0.235885587213953, 0.2242355269049856, 0.212391285902477, 0.2003532813798321, 0.1881219332151132, 0.1756976636109644, 0.1630808968255594, 0.1502720590140906, 0.1372715781798541, 0.1240798842335264, 0.1106974091587776, 0.09712458728191832, 0.08336185564284852, 0.06940965446414925, 0.05526842771475085, 0.04093862376421444, 0.02642069612328368, 0.01171510426600044, -0.003177685471666693, -0.01825719892205816, -0.03352295305060537, -0.04897445486112494, -0.0646112002478502, -0.08043267280066242, -0.09643834257019926, -0.112627664799703, -0.1290000786306327, -0.1455550057891977, -0.1622918492610786, -0.1792099919616769, -0.1963087954092871, -0.2135875984086027, -0.2310457157519598, -0.2486824369456806, -0.2664970249688105, -0.2844887150714383, -0.3026567136196605, -0.3210001969940831, -0.3395183105485606, -0.3582101676356449, -0.3770748487049572, -0.3961114004804047, -0.4153188352218415, -0.4346961300764191, -0.4542422265244825, -0.4739560299244546, -0.4938364091606939, -0.5138821963978383, -0.534092186944626, -0.5544651392296489, -0.5749997748909129, -0.5956947789804807, -0.6165488002848325, -0.6375604517609208, -0.6587283110872008, -0.6800509213281953, -0.7015267917104065, -0.7231543985066086, -0.7449321860247525, -0.7668585676968895, -0.7889319272626627, -0.8111506200410395, -0.8335129742830647, -0.8560172925974819, -0.8786618534401414, -0.9014449126571431, -0.9243647050706867, -0.9474194460956106, -0.9706073333735834, -0.9939265484108967, -1.017375258204767, -1.040951616842014, -1.064653767052926, -1.088479841702077, -1.112427965196777, -1.136496254792793, -1.160682821775922, -1.184985772496903, -1.209403209236145, -1.233933230873679, -1.258573933338722, -1.2833234098122, -1.308179750654618, -1.333141043030631, -1.358205370200765, -1.383370810449766, -1.40863543562018, -1.433997309218894, -1.459454484063533, -1.485004999434832, -1.510646877700368, -1.536378120374315, -1.562196703577298, -1.588100572859793, -1.614087637352057, -1.640155763203081, -1.666302766270726, -1.692526404024884, -1.718824366625327, -1.74519426713575, -1.771633630835552, -1.798139883590932, -1.824710339247097, -1.851342186003703, -1.878032471736065, -1.904778088225253, -1.93157575426091, -1.958421997581482, -1.985313135617585, -2.01224525500539, -2.039214189838331, -2.066215498626948, -2.093244439938457, -2.120295946689617, -2.147364599068652, -2.174444596064395, -2.201529725583553, -2.228613333139888, -2.255688289102368, -2.282746954492841, -2.309781145327649, -2.336782095501705, -2.363740418218131, -2.390646065971358, -2.417488289096891, -2.44425559290658, -2.470935693434314, -2.4975154718236, -2.52398092739549, -2.550317129442788, -2.576508167804507, -2.602537102283094, -2.628385910976061, -2.654035437603383, -2.679465337922398, -2.704654025332899, -2.729578615786863, -2.75421487212961, -2.778537148012369, -2.802518331530151, -2.826129788753571, -2.849341307338827, -2.872121040416536, -2.89443545097747, -2.916249256991514, -2.937525377515524, -2.958224880065928, -2.978306929553325, -2.997728739098628, -3.016445523073787, -3.034410452734657, -3.0515746148393, -3.06788697367179, -3.083294336919637, -3.097741325882067, -3.111170350535814, -3.123521590000843, -3.134732978832982, -3.144740199865809, -3.153476684499467, -3.160873620622505, -3.166859969215182, -3.171362490291792, -3.174305779003541, -3.175612312553783, -3.175202508920828, -3.172994798705099, -3.168918023311174, -3.162938281176024, -3.15500657109929, -3.145002864438455, -3.132842785485344, -3.118429335340455, -3.101666248349336, -3.082456357814898, -3.060701961311042, -3.036305073479516, -3.009167697814931, -2.979192124388277, -2.946281253861187, -2.910338949101777, -2.871270415589594, -2.828982611776675, -2.783384690531741, -2.734388472741461, -2.681908954074816, -2.625864845832506, -2.566179150701204, -2.502779774110725, -2.4356001717483, -2.364580033616801, -2.289666004831969, -2.210812443132069, -2.12798221282362, -2.041147514604038, -1.95029075038525, -1.855405421888773, -1.756497061390433, -1.653584192559599, -1.54669931886129, -1.435889936468064, -1.321219568060155, -1.202768813275518, -1.080636410904836, -0.9549403072092841, -0.8258187239704062, -0.6934312190617781, -0.5579597314619266, -0.4196096017084402, -0.2786105578265605, -0.1352176557548253, 0.01028783776029933, 0.1575976329140496, 0.3063756894635103, 0.456257589577102, 0.6068500274144233, 0.7577304330774656, 0.9084467496981679, 1.058517383519142, 1.207431347865658, 1.354648622876638, 1.499600753736668, 1.641691710903207, 1.78029903642361, 1.914775300852398, 2.044449895474375, 2.168631184474494, 2.286609041328183, 2.397657792970659, 2.501039594191887, 2.596008253143969, 2.68181352678618, 2.757705902474076, 2.822941878666082, 2.876789753816583, 2.918535927891774, 2.947491715528073, 2.963000663600673, 2.964446358833789, 2.951260703023278, 2.92293262442369, 2.879017183853577, 2.819145023086885, 2.743032091132802, 2.650489571089599, 2.541433916441499, 2.415896891029956, 2.274035491581714, 2.116141615760827, 1.942651322414728, 1.754153514233988, 1.551397856717371, 1.335301731456274, 1.106956006708196, 0.8676293944586789, 0.6187711511757108, 0.3620118698015674, 0.09916210382607393, -0.1677914387780811, -0.4366923926700977, -0.7052241777607213, -0.9709226293683176, -1.231192038714177, -1.483324799680197, -1.724524822390834, -1.951934829548721, -2.162667596831441, -2.353841133435478, -2.522617722570914, -2.666246654101369, -2.782110382569821, -2.86777373383744, -2.921035663138547, -2.9399829375956, -2.92304497870929, -2.869048957158816, -2.777274086147067, -2.647503913899843, -2.48007527487967, -2.275922427638407, -2.036614790585217, -1.764386591585537, -1.462156680202465, -1.133536720097587, -0.7828259916266581, -0.4149910992648606, -0.03562900346795703, 0.3490880100707895, 0.7324865417150311, 1.107486236579308, 1.466714219696053, 1.802639609771998, 2.107727297787681, 2.374609475642965, 2.596272621644978, 2.766256806482718, 2.878863293437918, 2.929365493864828, 2.914217433649195, 2.831253025118174, 2.679868664783432, 2.461181039361324, 2.178151574807956, 1.835668763446483, 1.440579712292028, 1.001662730358629, 0.5295336691978805, 0.03648009669191317, -0.463780745271422, -0.9564218545412962, -1.425902995129514, -1.856439174610261, -2.232532582634067, -2.539555348528342, -2.76436614197829, -2.895939310325102, -2.925981147459201, -2.849504291156118, -2.665328450360983, -2.376473997216442, -1.990414755047722, -1.519157896661415, -0.979122525595033, -0.3907944710350225, 0.2218567860582173, 0.8322023232021336, 1.412009311698736, 1.93270308087348, 2.366794601022297, 2.68941544045314, 2.879885937927452, 2.923227790112885, 2.811520904356931, 2.544997727757683, 2.132767850233479, 1.593072879201878, 0.9529875020075376, 0.2475079419799751, -0.4819963039138704, -1.189948264383619, -1.829277469637545, -2.354558440913892, -2.725395418216521, -2.909824902622844, -2.887467428062323, -2.65213571465495, -2.213603788941316 + ], + "label": "2P", + "angular_momentum": 1 + } + ], + "ps_wfc": [ + { + "index": 0, + "radial_function": [ + -0.0002330886845864572, -0.0002360205792526303, -0.0002389893526115842, -0.0002419954685400848, -0.0002450393967497467, -0.0002481216128604253, -0.0002512425984745339, -0.0002544028412522953, -0.0002576028349879383, -0.0002608430796868554, -0.0002641240816437283, -0.0002674463535216377, -0.0002708104144321675, -0.0002742167900165168, -0.0002776660125276334, -0.0002811586209133772, -0.0002846951609007327, -0.0002882761850810801, -0.0002919022529965372, -0.0002955739312273908, -0.0002992917934806234, -0.0003030564206795577, -0.0003068684010546252, -0.0003107283302352793, -0.0003146368113430632, -0.0003185944550858473, -0.0003226018798532544, -0.0003266597118132839, -0.0003307685850101503, -0.0003349291414633555, -0.0003391420312680032, -0.0003434079126963783, -0.0003477274523008024, -0.0003521013250177837, -0.000356530214273477, -0.0003610148120904694, -0.0003655558191959097, -0.0003701539451309982, -0.0003748099083618531, -0.0003795244363917728, -0.0003842982658749075, -0.0003891321427313632, -0.0003940268222637523, -0.0003989830692752107, -0.0004040016581889, -0.0004090833731690098, -0.0004142290082432859, -0.0004194393674270976, -0.0004247152648490656, -0.0004300575248782712, -0.0004354669822530638, -0.0004409444822114899, -0.0004464908806233627, -0.000452107044123991, -0.0004577938502495952, -0.0004635521875744189, -0.0004693829558495715, -0.0004752870661436153, -0.0004812654409849189, -0.0004873190145058059, -0.0004934487325885099, -0.0004996555530129721, -0.0005059404456064941, -0.0005123043923952726, -0.0005187483877578449, -0.0005252734385804571, -0.0005318805644143931, -0.0005385707976352792, -0.0005453451836043919, -0.0005522047808319989, -0.000559150661142749, -0.0005661839098431453, -0.0005733056258911258, -0.0005805169220677738, -0.0005878189251511932, -0.000595212776092564, -0.0006026996301944181, -0.0006102806572911541, -0.0006179570419318249, -0.0006257299835652233, -0.0006336006967272946, -0.0006415704112309072, -0.0006496403723580125, -0.0006578118410542168, -0.0006660860941258079, -0.0006744644244392499, -0.0006829481411231964, -0.0006915385697730405, -0.0007002370526580368, -0.0007090449489310329, -0.0007179636348408318, -0.000726994503947232, -0.0007361389673387697, -0.0007453984538531986, -0.0007547744103007469, -0.0007642683016901756, -0.0007738816114576875, -0.0007836158416987096, -0.0007934725134025951, -0.0008034531666902758, -0.0008135593610549005, -0.0008237926756055067, -0.0008341547093137502, -0.0008446470812637428, -0.0008552714309050331, -0.0008660294183087622, -0.0008769227244270492, -0.0008879530513556345, -0.0008991221225998286, -0.0009104316833438076, -0.0009218835007232884, -0.0009334793641016421, -0.0009452210853494766, -0.0009571104991277332, -0.0009691494631743512, -0.0009813398585945274, -0.0009936835901546367, -0.001006182586579842, -0.001018838800855452, -0.001031654210532068, -0.001044630818034563, -0.001057770650974961, -0.001071075762469232, -0.001084548231458091, -0.001098190163031819, -0.001112003688759171, -0.001125990967020425, -0.001140154183344617, -0.001154495550751019, -0.001169017310094911, -0.001183721730417697, -0.001198611109301437, -0.00121368777322782, -0.001228954077941669, -0.001244412408819013, -0.001260065181239775, -0.001275914840965166, -0.001291963864519815, -0.001308214759578706, -0.001324670065358988, -0.001341332353016698, -0.001358204226048492, -0.00137528832069841, -0.001392587306369774, -0.001410103886042253, -0.001427840796694179, -0.001445800809730175, -0.001463986731414155, -0.001482401403307777, -0.001501047702714411, -0.001519928543128676, -0.001539046874691647, -0.001558405684651782, -0.001578007997831636, -0.001597856877100464, -0.001617955423852745, -0.001638306778492737, -0.001658914120925127, -0.001679780671051842, -0.001700909689275118, -0.001722304477006885, -0.001743968377184564, -0.001765904774793353, -0.001788117097395069, -0.001810608815663664, -0.001833383443927434, -0.001856444540718101, -0.001879795709326745, -0.001903440598366766, -0.001927382902343914, -0.001951626362233466, -0.0019761747660647, -0.002001031949512683, -0.002026201796497519, -0.002051688239791134, -0.002077495261631664, -0.002103626894345617, -0.002130087220977805, -0.002156880375929232, -0.002184010545602998, -0.002211481969058298, -0.002239298938672688, -0.002267465800812635, -0.002295986956512523, -0.002324866862162199, -0.002354110030203133, -0.002383721029833377, -0.002413704487721341, -0.002444065088728572, -0.002474807576641622, -0.002505936754913082, -0.002537457487411984, -0.002569374699183592, -0.002601693377218768, -0.002634418571233014, -0.002667555394455274, -0.00270110902442671, -0.002735084703809456, -0.002769487741205581, -0.002804323511986345, -0.002839597459131835, -0.002875315094081233, -0.002911481997593687, -0.002948103820620065, -0.002985186285185649, -0.003022735185283889, -0.00306075638778146, -0.003099255833334626, -0.003138239537317161, -0.003177713590759932, -0.00321768416130225, -0.003258157494155247, -0.003299139913077293, -0.003340637821361701, -0.003382657702836868, -0.003425206122878914, -0.003468289729437139, -0.003511915254072281, -0.003556089513007878, -0.003600819408194816, -0.003646111928389239, -0.003691974150244055, -0.003738413239414089, -0.003785436451675165, -0.003833051134057255, -0.003881264725991796, -0.003930084760473532, -0.003979518865236843, -0.004029574763946909, -0.004080260277405844, -0.004131583324773924, -0.004183551924806236, -0.004236174197104779, -0.004289458363386344, -0.004343412748766316, -0.004398045783058539, -0.004453366002091609, -0.004509382049041599, -0.004566102675781564, -0.004623536744248013, -0.004681693227824461, -0.004740581212742468, -0.004800209899500152, -0.004860588604298582, -0.004921726760496192, -0.004983633920081431, -0.005046319755163921, -0.005109794059484318, -0.00517406674994313, -0.005239147868148729, -0.005305047581984768, -0.005371776187197304, -0.005439344109001794, -0.005507761903710293, -0.005577040260379047, -0.005647190002476757, -0.005718222089573796, -0.005790147619052565, -0.005862977827839367, -0.005936724094157962, -0.00601139793930511, -0.006087011029448427, -0.006163575177446716, -0.006241102344693195, -0.006319604642981784, -0.006399094336396812, -0.006479583843226413, -0.006561085737899893, -0.006643612752949392, -0.006727177780996137, -0.006811793876761552, -0.006897474259103619, -0.006984232313078702, -0.007072081592029232, -0.007161035819697537, -0.007251108892366137, -0.00734231488102487, -0.00743466803356512, -0.00752818277700153, -0.007622873719721569, -0.007718755653763194, -0.00781584355712107, -0.007914152596081622, -0.008013698127587334, -0.008114495701630605, -0.008216561063677554, -0.008319910157122178, -0.008424559125771131, -0.008530524316359651, -0.008637822281098908, -0.00874646978025517, -0.008856483784761265, -0.008967881478860623, -0.009080680262784385, -0.009194897755461957, -0.00931055179726539, -0.009427660452788074, -0.009546242013658094, -0.009666315001386712, -0.009787898170252435, -0.009911010510221014, -0.01003567124990192, -0.01016189985954167, -0.01028971605405452, -0.01041913979609088, -0.01055019129914404, -0.01068289103069561, -0.01081725971540007, -0.01095331833830912, -0.01109108814813606, -0.01123059066056088, -0.01137184766157649, -0.01151488121087655, -0.01165971364528545, -0.01180636758223104, -0.01195486592326034, -0.0121052318575992, -0.01225748886575595, -0.01241166072316997, -0.01256777150390554, -0.01272584558439147, -0.01288590764720722, -0.01304798268491594, -0.01321209600394507, -0.01337827322851501, -0.01354654030461654, -0.0137169235040374, -0.0138894494284388, -0.01406414501348239, -0.01424103753300824, -0.01442015460326456, -0.01460152418718958, -0.01478517459874647, -0.0149711345073117, -0.01515943294211761, -0.01535009929674982, -0.01554316333369999, -0.01573865518897483, -0.01593660537676179, -0.01613704479415225, -0.01634000472592284, -0.01654551684937547, -0.01675361323923694, -0.01696432637261873, -0.01717768913403764, -0.01739373482049807, -0.01761249714663656, -0.01783401024992939, -0.01805830869596398, -0.01828542748377467, -0.018515402051244, -0.01874826828056957, -0.01898406250379805, -0.01922282150842647, -0.01946458254307185, -0.01970938332320988, -0.01995726203698341, -0.02020825735108161, -0.0204624084166905, -0.02071975487551574, -0.02098033686587847, -0.02124419502888484, -0.02151137051467041, -0.02178190498871993, -0.02205584063826341, -0.02233322017874954, -0.02261408686039676, -0.02289848447482359, -0.02318645736175839, -0.02347805041582981, -0.02377330909343877, -0.02407227941971247, -0.02437500799554191, -0.02468154200470332, -0.02499192922106458, -0.02530621801587758, -0.02562445736515716, -0.02594669685714786, -0.02627298669987919, -0.02660337772881034, -0.02693792141456541, -0.02727666987075966, -0.02761967586191827, -0.02796699281148811, -0.02831867480994356, -0.02867477662298739, -0.02903535369984763, -0.029400462181671, -0.0297701589100145, -0.0301445014354355, -0.03052354802618149, -0.0309073576769806, -0.03129599011793334, -0.03168950582350705, -0.03208796602163361, -0.03249143270291147, -0.03289996862991298, -0.03331363734659779, -0.03373250318783332, -0.03415663128902335, -0.03458608759584536, -0.03502093887409785, -0.03546125271965809, -0.03590709756855176, -0.03635854270713487, -0.0368156582823891, -0.03727851531233147, -0.03774718569653873, -0.03822174222678806, -0.03870225859781432, -0.03918880941818479, -0.0396814702212925, -0.04018031747646841, -0.04068542860021375, -0.04119688196755287, -0.04171475692350757, -0.04223913379469366, -0.04277009390103988, -0.04330771956763084, -0.04385209413667366, -0.04440330197958954, -0.04496142850923061, -0.04552656019222247, -0.04609878456143333, -0.04667819022856991, -0.04726486689690063, -0.04785890537410688, -0.04846039758526172, -0.04906943658593779, -0.04968611657544347, -0.05031053291018824, -0.05094278211717731, -0.05158296190763522, -0.0522311711907593, -0.0528875100876023, -0.05355207994508443, -0.05422498335013516, -0.05490632414396347, -0.0555962074364576, -0.05629473962071331, -0.05700202838769033, -0.05771818274099683, -0.05844331301180068, -0.05917753087386795, -0.0599209493587269, -0.06067368287095722, -0.06143584720360362, -0.06220755955371206, -0.0629889385379885, -0.06378010420857819, -0.06458117806896431, -0.06539228308998479, -0.06621354372596473, -0.067045085930964, -0.06788703717513696, -0.06873952646120285, -0.06960268434102451, -0.07047664293229261, -0.07136153593531361, -0.0722574986498981, -0.07316466799234676, -0.07408318251253115, -0.07501318241106505, -0.07595480955656413, -0.07690820750298917, -0.07787352150706926, -0.07885089854580091, -0.07984048733401786, -0.08084243834202778, -0.08185690381331052, -0.08288403778227206, -0.08392399609204959, -0.08497693641236073, -0.08604301825739177, -0.08712240300371742, -0.08821525390824574, -0.08932173612618131, -0.09044201672899758, -0.09157626472241234, -0.0927246510643565, -0.09388734868292767, -0.09506453249431995, -0.09625637942071863, -0.0974630684081515, -0.09868478044428469, -0.09992169857615214, -0.101174007927808, -0.1024418957178869, -0.1037255512770635, -0.1050251660653932, -0.1063409336895232, -0.1076730499197587, -0.1090217127069663, -0.1103871221993026, -0.1117694807587473, -0.1131689929774259, -0.1145858656937024, -0.1160203080080225, -0.1174725312984887, -0.1189427492361448, -0.1204311777999493, -0.1219380352914144, -0.1234635423488861, -0.1250079219614423, -0.126571399482382, -0.128154202642279, -0.1297565615615724, -0.1313787087626632, -0.1330208791814897, -0.1346833101785462, -0.1363662415493155, -0.1380699155340786, -0.1397945768270663, -0.1415404725849162, -0.1433078524343963, -0.1450969684793532, -0.1469080753068463, -0.1487414299924202, -0.150597292104474, -0.1524759237076773, -0.1543775893653857, -0.1563025561410043, -0.1582510935982448, -0.1602234738002243, -0.1622199713073448, -0.1642408631738972, -0.1662864289433262, -0.1683569506420915, -0.1704527127720605, -0.1725740023013619, -0.1747211086536288, -0.1768943236955563, -0.1790939417226967, -0.1813202594434113, -0.1835735759608968, -0.1858541927531968, -0.188162413651113, -0.1904985448139181, -0.1928628947027777, -0.1952557740517791, -0.1976774958364629, -0.2001283752397522, -0.2026087296151634, -0.2051188784471871, -0.2076591433087176, -0.2102298478154055, -0.212831317576808, -0.2154638801441989, -0.2181278649549051, -0.2208236032730236, -0.2235514281263714, -0.2263116742395172, -0.229104677962732, -0.2319307771966998, -0.2347903113128142, -0.237683621068888, -0.2406110485200935, -0.2435729369249439, -0.246569630646124, -0.2496014750459691, -0.2526688163763822, -0.2557720016629793, -0.2589113785832354, -0.2620872953384092, -0.2653001005190049, -0.2685501429635284, -0.27183777161029, -0.2751633353419854, -0.2785271828227964, -0.2819296623277274, -0.2853711215638969, -0.2888519074834877, -0.2923723660880517, -0.2959328422238625, -0.2995336793679856, -0.3031752194047431, -0.3068578023922267, -0.3105817663185087, -0.3143474468471918, -0.3181551770519246, -0.322005287139501, -0.3258981041611524, -0.3298339517116251, -0.3338131496156362, -0.3378360136012744, -0.3419028549599163, -0.3460139801922071, -0.3501696906396461, -0.3543702821013105, -0.3586160444352332, -0.3629072611439416, -0.3672442089436564, -0.3716271573166284, -0.3760563680460937, -0.3805320947333042, -0.3850545822960855, -0.3896240664483637, -0.3942407731600862, -0.3989049180969602, -0.4036167060394149, -0.4083763302801839, -0.4131839719999048, -0.4180397996201038, -0.4229439681329514, -0.4278966184071431, -0.4328978764692658, -0.4379478527600029, -0.4430466413645154, -0.4481943192163492, -0.4533909452741974, -0.4586365596708565, -0.4639311828337069, -0.4692748145760497, -0.474667433158639, -0.4801089943207417, -0.4855994302800704, -0.4911386487009395, -0.4967265316299978, -0.5023629343989148, -0.5080476844933953, -0.5137805803879281, -0.5195613903456827, -0.525389851182994, -0.5312656669979037, -0.5371885078622478, -0.5431580084768214, -0.5491737667891807, -0.5552353425736847, -0.5613422559734312, -0.5674939860037834, -0.5736899690172406, -0.579929597129476, -0.5862122166064149, -0.5925371262123225, -0.5989035755189289, -0.6053107631757196, -0.6117578351416145, -0.6182438828783461, -0.6247679415059855, -0.6313289879211583, -0.6379259388786387, -0.644557649037146, -0.6512229089703075, -0.6579204431439408, -0.6646489078609412, -0.6714068891752777, -0.6781929007767925, -0.6850053818486876, -0.6918426948998483, -0.6987031235743459, -0.7055848704407548, -0.7124860547641804, -0.7194047102641572, -0.7263387828619294, -0.7332861284208974, -0.7402445104843873, -0.7472115980152656, -0.754184963142258, -0.7611620789182957, -0.7681403170965719, -0.7751169459304722, -0.782089128004005, -0.7890539180998098, -0.7960082611123865, -0.8029489900146634, -0.8098728238866137, -0.8167763660152029, -0.8236561020755258, -0.830508398403657, -0.8373295003723384, -0.8441155308813336, -0.8508624889749525, -0.8575662485999497, -0.8642225575177567, -0.8708270363857242, -0.8773751780228364, -0.8838623468761386, -0.8902837787048818, -0.8966345805002336, -0.902909730659167, -0.9091040794319692, -0.9152123496636352, -0.9212291378501668, -0.9271489155316506, -0.9329660310447062, -0.9386747116576851, -0.9442690661127143, -0.9497430875993429, -0.9550906571852223, -0.9603055477297954, -0.9653814283075028, -0.9703118691674547, -0.9750903472568325, -0.9797102523355619, -0.984164893709875, -0.988447507612394, -0.9925512652561871, -0.9964692815898843, -1.000194624780438, -1.003720326449325, -1.007039392687019, -1.010144815869304, -1.013029587297446, -1.015686710682395, -1.018109216490978, -1.020290177169447, -1.022222723256776, -1.023900060396639, -1.025315487253097, -1.026462414330579, -1.027334383693773, -1.027925089577416, -1.028228399869828, -1.028238378447083, -1.027949308327149, -1.027355715604982, -1.026452394120397, -1.025234430800588, -1.023697231608329, -1.021836548015171, -1.019648503906256, -1.017129622809791, -1.014276855329626, -1.011087606643771, -1.007559763915152, -1.003691723443309, -0.9994824173671571, -0.994931339709467, -0.9900385715332204, -0.9848048049587191, -0.9792313657682049, -0.9733202343019056, -0.9670740643260627, -0.9604961995295577, -0.953590687281673, -0.9463622892592307, -0.9388164885272727, -0.9309594926337897, -0.9227982322559974, -0.9143403549138438, -0.9055942132460002, -0.8965688473251332, -0.887273960473269, -0.8777198880250062, -0.8679175584769996, -0.8578784464570357, -0.8476145169460249, -0.8371381601921465, -0.8264621167689746, -0.8155993922498237, -0.8045631609996076, -0.7933666586243879, -0.7820230626686686, -0.7705453612123715, -0.7589462090948266, -0.7472377715829961, -0.7354315554070087, -0.7235382272091838, -0.7115674195941801, -0.6995275251292407, -0.687425478825591, -0.6752665298361491, -0.6630540033317782, -0.6507907576164551, -0.6384848744604239, -0.6261458576861592, -0.6137831146648203, -0.6014059462080241, -0.5890235367085978, -0.5766449445402697, -0.5642790927257598, -0.5519347598822416, -0.539620571452702, -0.527344991231283, -0.5151163131902834, -0.502942653616112, -0.4908319435611176, -0.4787919216178527, -0.4668301270220611, -0.4549538930902748, -0.4431703409976512, -0.4314863739014124, -0.4199086714149433, -0.4084436844373249, -0.3970976303428134, -0.3858764885345013, -0.3747859963661168, -0.3638316454356407, -0.3530186782541189, -0.3423520852927535, -0.3318366024110249, -0.3214767086682693, -0.3112766245207706, -0.3012403104060475, -0.2913714657156121, -0.2816735281570417, -0.2721496735057441, -0.2628028157463104, -0.2536356076028238, -0.2446504414569433, -0.2358494506519983, -0.2272345111807139, -0.218807243753543, -0.2105690162439051, -0.2025209465059291, -0.1946639055595672, -0.1869985211371892, -0.1795251815849956, -0.1722440401117836, -0.1651550193767937, -0.1582578164075369, -0.1515519078376674, -0.1450365554541306, -0.1387108120419742, -0.1325735275143796, -0.1266233553146441, -0.1208587590760343, -0.115278019524645, -0.1098792416096259, -0.104660361844411, -0.0996191558418796, -0.09475324602572505, -0.09006010949969043, -0.08553708605577222, -0.08118138630198585, -0.07699009988984339, -0.07296020382131337, -0.0690885708147223, -0.06537197770881993, -0.061807113884068, -0.05839058968013044, -0.05511894478854148, -0.05198865659961041, -0.04899614848278937, -0.04613779797998339, -0.04340994489162108, -0.04080889923572972, -0.0383309490607684, -0.03597236809356671, -0.03372942320439187, -0.03159838167192185, -0.02957551823173176, -0.02765712189280381, -0.02583950250754074, -0.02411899708179608, -0.02249197581252554, -0.02095484784180638, -0.01950406671716069, -0.01813613554934628, -0.01684761186003928, -0.01563511211311852, -0.01449531592456554, -0.01342496994730835, -0.01242089142865453, -0.01147997143927182, -0.01059917777397489, -0.00977555752585762, -0.009006239336563658, -0.008288435326707434, -0.00761944271163588, -0.00699664510885134, -0.006417513544492758, -0.005879607167288864, -0.005380573679348868, -0.004918149494038228, -0.00449015963199505, -0.004094517367073174, -0.003729223634647816, -0.003392366215286457, -0.003082118707269776, -0.002796739301843662, -0.002534569375393368, -0.002294031912954813, -0.002073629777616881, -0.001871943840423704, -0.001687630985359436, -0.001519422003892729, -0.001366119393376954, -0.001226595073349166, -0.001099788033449769, -0.0009847019263005425, -0.0008804026182359821, -0.0007860157102869857, -0.0007007240412722902, -0.0006237651842673734, -0.0005544289470985554, -0.0004920548868576654, -0.0004360298477557288, -0.0003857855309385741, -0.0003407961041787968, -0.0003005758586427772, -0.0002646769192138927, -0.0002326870141388746, -0.0002042273090584041, -0.0001789503097901307, -0.0001565378375566435, -0.0001366990796964564, -0.000119168718266327, -0.0001037051383413707, -9.00887172481754e-05, -7.812019542779851e-05, -6.761912912201281e-05, -5.842242460895296e-05, -5.038295328445776e-05, -4.336824649358246e-05, -3.725926866325817e-05, -3.194926697183306e-05, -2.734269551383481e-05, -2.335421167802075e-05, -1.990774225262833e-05, -1.693561660241775e-05, -1.437776412625567e-05, -1.218097309920702e-05, -1.029820793018665e-05, -8.687981816674794e-06, -7.313781759850268e-06, -6.143542905433543e-06, -5.149169198587179e-06, -4.306097384719733e-06, -3.592901449294358e-06, -2.990934653795811e-06, -2.484006526499038e-06, -2.058091171240737e-06, -1.701071112250602e-06, -1.402484988303688e-06, -1.153359040157857e-06, -9.459948844362916e-07, -7.73785825292214e-07, -6.310838740916452e-07, -5.130749032480125e-07, -4.156632415238565e-07, -3.353698206170322e-07, -2.692424049643669e-07, -2.147758159144143e-07, -1.698443105045081e-07, -1.326278093295039e-07, -1.017341274336734e-07, -7.505454182638256e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "2S", + "angular_momentum": 0 + }, + { + "index": 1, + "radial_function": [ + -0.0002000516968052631, -0.0002025680373076671, -0.0002051160294744087, -0.0002076960714343096, -0.0002103085663240318, -0.0002129539223510678, -0.0002156325528575241, -0.0002183448763847063, -0.0002210913167385167, -0.0002238723030556755, -0.0002266882698707728, -0.0002295396571841658, -0.0002324269105307295, -0.0002353504810494721, -0.0002383108255540268, -0.0002413084066040281, -0.0002443436925773883, -0.0002474171577434826, -0.0002505292823372531, -0.0002536805526342476, -0.0002568714610265995, -0.0002601025060999655, -0.0002633741927114299, -0.0002666870320683889, -0.0002700415418084287, -0.0002734382460802049, -0.0002768776756253431, -0.0002803603678613673, -0.0002838868669656712, -0.0002874577239605487, -0.000291073496799289, -0.0002947347504533594, -0.0002984420570006819, -0.0003021959957150207, -0.0003059971531564959, -0.0003098461232632313, -0.0003137435074441607, -0.0003176899146729962, -0.0003216859615833825, -0.0003257322725652462, -0.0003298294798623565, -0.0003339782236711142, -0.0003381791522405832, -0.0003424329219737789, -0.0003467401975302341, -0.0003511016519298488, -0.0003555179666580528, -0.0003599898317722873, -0.0003645179460098266, -0.0003691030168969573, -0.0003737457608595273, -0.0003784469033348902, -0.0003832071788852534, -0.0003880273313124542, -0.0003929081137741808, -0.0003978502889016499, -0.0004028546289187711, -0.0004079219157628055, -0.000413052941206544, -0.0004182485069820224, -0.0004235094249057906, -0.0004288365170057603, -0.0004342306156496462, -0.0004396925636750232, -0.0004452232145210211, -0.0004508234323616719, -0.0004564940922409385, -0.0004622360802094397, -0.0004680502934628943, -0.0004739376404823096, -0.0004798990411759289, -0.0004859354270229679, -0.0004920477412191572, -0.0004982369388241161, -0.0005045039869105807, -0.0005108498647155066, -0.0005172755637930751, -0.0005237820881696219, -0.0005303704545005151, -0.0005370416922290077, -0.000543796843747085, -0.0005506369645583376, -0.0005575631234428829, -0.0005645764026243599, -0.0005716778979390257, -0.0005788687190069767, -0.0005861499894055259, -0.0005935228468447592, -0.0006009884433453006, -0.000608547945418314, -0.0006162025342477661, -0.0006239534058749846, -0.0006318017713855378, -0.0006397488570984616, -0.000647795904757872, -0.0006559441717269802, -0.0006641949311845549, -0.0006725494723238513, -0.0006810091005540441, -0.0006895751377041946, -0.0006982489222297795, -0.0007070318094218222, -0.0007159251716186501, -0.0007249303984203193, -0.0007340488969057341, -0.0007432820918524946, -0.0007526314259595158, -0.00076209836007244, -0.0007716843734118888, -0.0007813909638045855, -0.0007912196479173805, -0.0008011719614942245, -0.0008112494595961201, -0.000821453716844091, -0.0008317863276652109, -0.0008422489065417184, -0.0008528430882632735, -0.0008635705281823827, -0.0008744329024730366, -0.0008854319083926031, -0.0008965692645470087, -0.0009078467111592618, -0.0009192660103413471, -0.0009308289463695444, -0.0009425373259632087, -0.0009543929785670516, -0.0009663977566369813, -0.0009785535359295291, -0.0009908622157949229, -0.001003325719473844, -0.001015945994397912, -0.001028725012493953, -0.001041664770492096, -0.00105476729023774, -0.001068034619007446, -0.001081468829828801, -0.001095072021804306, -0.001108846320439337, -0.001122793877974228, -0.001136916873720538, -0.001151217514401532, -0.001165698034496959, -0.001180360696592157, -0.001195207791731555, -0.00121024163977661, -0.001225464589768257, -0.001240879020293909, -0.001256487339859073, -0.001272291987263644, -0.001288295431982929, -0.001304500174553455, -0.001320908746963639, -0.001337523713049368, -0.001354347668894544, -0.001371383243236687, -0.001388633097877609, -0.001406099928099278, -0.001423786463084898, -0.001441695466345286, -0.001459829736150614, -0.001478192105967573, -0.001496785444902039, -0.001515612658147307, -0.001534676687437951, -0.00155398051150941, -0.00157352714656332, -0.00159331964673874, -0.001613361104589261, -0.001633654651566142, -0.001654203458507515, -0.001675010736133719, -0.001696079735548902, -0.001717413748748889, -0.001739016109135464, -0.001760890192037103, -0.001783039415236251, -0.00180546723950325, -0.001828177169136946, -0.001851172752512119, -0.001874457582633794, -0.00189803529769849, -0.001921909581662573, -0.001946084164817709, -0.001970562824373575, -0.001995349385047898, -0.002020447719663883, -0.002045861749755195, -0.002071595446178502, -0.002097652829733739, -0.00212403797179218, -0.002150754994932367, -0.002177808073584078, -0.002205201434680349, -0.002232939358317716, -0.002261026178424752, -0.002289466283438981, -0.002318264116992349, -0.002347424178605255, -0.00237695102438934, -0.002406849267759106, -0.00243712358015244, -0.002467778691760261, -0.00249881939226526, -0.00253025053158998, -0.002562077020654274, -0.00259430383214226, -0.002626936001278966, -0.002659978626616675, -0.002693436870831185, -0.002727315961528072, -0.002761621192059047, -0.002796357922348632, -0.002831531579731156, -0.002867147659798302, -0.002903211727257305, -0.002939729416799891, -0.002976706433982192, -0.003014148556115646, -0.003052061633169136, -0.003090451588682448, -0.003129324420691169, -0.003168686202663262, -0.00320854308444734, -0.003248901293232883, -0.003289767134522522, -0.003331146993116472, -0.003373047334109416, -0.003415474703899822, -0.003458435731211979, -0.003501937128130857, -0.003545985691149925, -0.00359058830223219, -0.003635751929884477, -0.00368148363024524, -0.003727790548186033, -0.003774679918426745, -0.00382215906666492, -0.003870235410719168, -0.003918916461686958, -0.003968209825116961, -0.004018123202196031, -0.004068664390951173, -0.0041198412874665, -0.004171661887115511, -0.00422413428580881, -0.004277266681257459, -0.004331067374252208, -0.004385544769958728, -0.004440707379229109, -0.004496563819929807, -0.004553122818286214, -0.004610393210244115, -0.004668383942848178, -0.004727104075637753, -0.004786562782060147, -0.004846769350901603, -0.00490773318773624, -0.004969463816393101, -0.00503197088044163, -0.00509526414469572, -0.005159353496736616, -0.005224248948454894, -0.005289960637611728, -0.00535649882941974, -0.005423873918143614, -0.005492096428720758, -0.005561177018402255, -0.00563112647841434, -0.005701955735640676, -0.005773675854325682, -0.005846298037799158, -0.0059198336302225, -0.00599429411835673, -0.006069691133352659, -0.006146036452563433, -0.006223342001379698, -0.006301619855087777, -0.006380882240750985, -0.006461141539114508, -0.006542410286534081, -0.006624701176928705, -0.00670802706375782, -0.006792400962023104, -0.006877836050295305, -0.006964345672766365, -0.007051943341327107, -0.007140642737670935, -0.007230457715423678, -0.007321402302300058, -0.007413490702287036, -0.00750673729785435, -0.007601156652192648, -0.007696763511479456, -0.007793572807173404, -0.00789159965833705, -0.007990859373988544, -0.008091367455482666, -0.008193139598921408, -0.0082961916975946, -0.00840053984445087, -0.008506200334599333, -0.008613189667842362, -0.00872152455123986, -0.008831221901705351, -0.00894229884863436, -0.009054772736565356, -0.009168661127873788, -0.009283981805499488, -0.009400752775707925, -0.009518992270885696, -0.009638718752370632, -0.009759950913317002, -0.009882707681596147, -0.01000700822273307, -0.01013287194287932, -0.01026031849182266, -0.01038936776603395, -0.01052003991175164, -0.01065235532810442, -0.01078633467027235, -0.01092199885268708, -0.01105936905227144, -0.01119846671171904, -0.01133931354281425, -0.01148193152979304, -0.01162634293274525, -0.0117725702910586, -0.01192063642690514, -0.01207056444877046, -0.01222237775502628, -0.01237610003754689, -0.01253175528536983, -0.01268936778840157, -0.01284896214116849, -0.01301056324661373, -0.01317419631994064, -0.01333988689250291, -0.01350766081574252, -0.01367754426517548, -0.0138495637444263, -0.01402374608931164, -0.01420011847197347, -0.01437870840506272, -0.0145595437459736, -0.01474265270112943, -0.01492806383032043, -0.01511580605109391, -0.01530590864319786, -0.0154984012530781, -0.01569331389842977, -0.01589067697280383, -0.01609052125026893, -0.01629287789012961, -0.01649777844170102, -0.01670525484914121, -0.01691533945634126, -0.01712806501187396, -0.01734346467400187, -0.01756157201574513, -0.01778242103000971, -0.01800604613477693, -0.01823248217835455, -0.01846176444469037, -0.01869392865874892, -0.01892901099195167, -0.01916704806768185, -0.01940807696685395, -0.01965213523354918, -0.01989926088071698, -0.0201494923959437, -0.02040286874728887, -0.0206594293891896, -0.0209192142684342, -0.02118226383020521, -0.02144861902419282, -0.02171832131077932, -0.02199141266729492, -0.02226793559434612, -0.02254793312221689, -0.0228314488173435, -0.02311852678886369, -0.02340921169524059, -0.02370354875096246, -0.02400158373331856, -0.02430336298925194, -0.02460893344228977, -0.02491834259955203, -0.02523163855883872, -0.02554887001579688, -0.02587008627116752, -0.02619533723811338, -0.02652467344962802, -0.02685814606602682, -0.02719580688252065, -0.02753770833687258, -0.02788390351713843, -0.02823444616949168, -0.02858939070613307, -0.02894879221328583, -0.02931270645927677, -0.02968118990270393, -0.03005429970069128, -0.03043209371723071, -0.03081463053161229, -0.03120196944694273, -0.03159417049875286, -0.03199129446369451, -0.03239340286832672, -0.03280055799799246, -0.0332128229057856, -0.03363026142160856, -0.03405293816132132, -0.03448091853598138, -0.03491426876117565, -0.03535305586644397, -0.03579734770479466, -0.03624721296231228, -0.03670272116785744, -0.0371639427028591, -0.03763094881119913, -0.03810381160918921, -0.03858260409564016, -0.03906740016202302, -0.03955827460272264, -0.04005530312538279, -0.0405585623613429, -0.04106812987616611, -0.04158408418025798, -0.04210650473957583, -0.04263547198642794, -0.043171067330362, -0.04371337316914255, -0.04426247289981592, -0.04481845092986288, -0.04538139268843727, -0.04595138463769002, -0.04652851428417771, -0.04711287019035382, -0.04770454198614232, -0.04830362038059142, -0.04891019717360685, -0.04952436526776224, -0.05014621868018549, -0.05077585255451929, -0.05141336317295355, -0.05205884796832791, -0.05271240553630234, -0.05337413564759268, -0.05404413926026962, -0.05472251853211788, -0.05540937683305269, -0.05610481875759141, -0.05680895013737547, -0.05752187805374132, -0.05824371085033522, -0.05897455814576905, -0.0597145308463128, -0.06046374115861937, -0.06122230260247812, -0.06199033002359152, -0.06276793960637049, -0.06355524888674383, -0.06435237676497482, -0.0651594435184813, -0.06597657081465158, -0.06680388172365075, -0.0676415007312109, -0.06848955375139769, -0.06934816813934658, -0.07021747270396123, -0.07109759772056544, -0.0719886749435013, -0.0728908376186634, -0.0738042204959617, -0.07472895984170207, -0.07566519345087508, -0.0766130606593429, -0.07757270235591235, -0.07854426099428383, -0.07952788060486345, -0.08052370680642586, -0.08153188681761556, -0.08255256946827147, -0.08358590521056225, -0.08463204612991644, -0.0856911459557323, -0.08676336007185124, -0.08784884552677738, -0.0889477610436266, -0.09006026702978605, -0.09118652558626496, -0.09232670051671779, -0.0934809573361169, -0.09464946327905549, -0.09583238730765667, -0.09702990011906568, -0.09824217415250087, -0.09946938359583744, -0.1007117043916982, -0.1019693142430234, -0.1032423926180902, -0.1045311207549536, -0.1058356816652757, -0.1071562601375119, -0.1084930427394209, -0.1098462178198626, -0.111215975509848, -0.112602507722802, -0.1140060081540026, -0.115426672279152, -0.1168646973520402, -0.1183202824012546, -0.1197936282258911, -0.1212849373902185, -0.1227944142172467, -0.1243222647811471, -0.1258686968984723, -0.1274339201181186, -0.1290181457099749, -0.1306215866521971, -0.1322444576170467, -0.1338869749552292, -0.135549356678664, -0.1372318224416186, -0.138934593520134, -0.1406578927896657, -0.1424019447008656, -0.1441669752534208, -0.1459532119678699, -0.1477608838553067, -0.1495902213848856, -0.1514414564490328, -0.1533148223262683, -0.1552105536415406, -0.1571288863239672, -0.159070057561879, -0.161034305755053, -0.1630218704640218, -0.1650329923563395, -0.1670679131496826, -0.169126875551657, -0.1712101231961801, -0.1733179005763019, -0.175450452973324, -0.1776080263820698, -0.1797908674321561, -0.1819992233051101, -0.184233341647168, -0.1864934704775924, -0.1887798580923324, -0.1910927529628498, -0.1934324036299287, -0.195799058592274, -0.1981929661897073, -0.2006143744807543, -0.2030635311144147, -0.2055406831959, -0.2080460771461136, -0.2105799585546447, -0.2131425720260386, -0.2157341610190987, -0.2183549676789668, -0.2210052326617215, -0.2236851949512286, -0.2263950916679663, -0.2291351578695393, -0.2319056263425916, -0.2347067273858152, -0.2375386885837448, -0.2404017345710204, -0.2432960867867885, -0.2462219632189075, -0.2491795781376067, -0.2521691418182485, -0.2551908602528234, -0.2582449348498085, -0.2613315621219995, -0.2644509333619262, -0.267603234304445, -0.2707886447760963, -0.2740073383308008, -0.2772594818714638, -0.2805452352570404, -0.283864750894611, -0.287218173316002, -0.2906056387384802, -0.2940272746090364, -0.2974831991317668, -0.3009735207778506, -0.3044983377776153, -0.3080577375941654, -0.3116517963780538, -0.3152805784024512, -0.3189441354782783, -0.3226425063487434, -0.3263757160627312, -0.3301437753264774, -0.3339466798329574, -0.3377844095684193, -0.3416569280954775, -0.3455641818121883, -0.349506099186523, -0.3534825899656496, -0.3574935443594435, -0.361538832197638, -0.3656183020600356, -0.3697317803792018, -0.3738790705150664, -0.3780599518008736, -0.3822741785599192, -0.3865214790925341, -0.390801554632783, -0.3951140782743598, -0.3994586938651882, -0.4038350148702466, -0.4082426232021688, -0.4126810680191932, -0.417149864490065, -0.4216484925255339, -0.4261763954761184, -0.4307329787958588, -0.4353176086718223, -0.4399296106191691, -0.4445682680416548, -0.4492328207574947, -0.453922463490585, -0.4586363443271476, -0.4633735631379394, -0.4681331699662539, -0.4729141633820332, -0.4777154888025004, -0.4825360367798353, -0.4873746412565191, -0.4922300777891044, -0.4971010617412864, -0.5019862464472957, -0.5068842213467785, -0.5117935100924811, -0.5167125686322346, -0.5216397832669017, -0.5265734686861401, -0.5315118659840394, -0.5364531406568911, -0.5413953805855894, -0.5463365940053778, -0.551274707465916, -0.5562075637849099, -0.5611329199987966, -0.5660484453143004, -0.5709517190649441, -0.5758402286769353, -0.5807113676491826, -0.5855624335525147, -0.5903906260535696, -0.5951930449691647, -0.5999666883573563, -0.6047084506518119, -0.6094151208464978, -0.6140833807381585, -0.6187098032344553, -0.6232908507361188, -0.6278228736019239, -0.6323021087057451, -0.6367246780954686, -0.6410865877639883, -0.6453837265430268, -0.6496118651310191, -0.6537666552667709, -0.6578436290611374, -0.6618381984994187, -0.6657456551276874, -0.6695611699367207, -0.6732797934576707, -0.6768964560840594, -0.6804059686350911, -0.6838030231756753, -0.6870821941089119, -0.6902379395570993, -0.6932646030476176, -0.6961564155202375, -0.6989074976725851, -0.70151186266058, -0.703963419170668, -0.7062559748806254, -0.7083832403255174, -0.7103388331851452, -0.7121162830089204, -0.7137090363935844, -0.7151104626285505, -0.7163138598228254, -0.7173124615265032, -0.7180994438586781, -0.7186679331522747, -0.7190110141247589, -0.7191217385819259, -0.718993134659967, -0.718618216608782, -0.717989995117002, -0.7171014881764249, -0.7159457324805096, -0.7145157953482459, -0.7128047871610544, -0.7108058742964392, -0.7085122925378295, -0.7059173609354847, -0.703014496088415, -0.699797226812073, -0.696259209151051, -0.6923942416901987, -0.6881962811115099, -0.6836594579377794, -0.6787780923974728, -0.6735467103385465, -0.6679600591120431, -0.6620131233393894, -0.6557011404703409, -0.6490196160316356, -0.6419643384597172, -0.634531393404395, -0.6267171773842654, -0.6185184106691471, -0.609932149259883, -0.6009557958318571, -0.5915871095054939, -0.5818242143052716, -0.5716656061684238, -0.5611101583658431, -0.550157125201122, -0.5388061438591423, -0.52705723428387, -0.5149107969759814, -0.5023676086151689, -0.4894288154298537, -0.4760959242586579, -0.4623707912741359, -0.4482556083698804, -0.4337528872478921, -0.4188654412842906, -0.4035963652982343, -0.3879490134019482, -0.3719269751689627, -0.3555340504234512, -0.3387742230261367, -0.3216516341113905, -0.304170555316264, -0.2863353626347092, -0.2681505116291211, -0.2496205148361035, -0.2307499223130926, -0.2115433063867182, -0.1920052517809733, -0.1721403524222287, -0.1519532163371771, -0.1314484801764973, -0.110630835009467, -0.08950506513948327, -0.0680761017845733, -0.04634909354664231, -0.02432949565386893, -0.00202317999815349, 0.02056343200204807, 0.04342321170904803, 0.06654813880544937, 0.08992930118132007, 0.1135577351913601, 0.1374244216155619, 0.1615201718081993, 0.1858356290835378, 0.2103612700185566, 0.2350874056810136, 0.2600041827920362, 0.2851015848332275, 0.3103694331090283, 0.3357973877757702, 0.3613749488494883, 0.387091457205131, 0.4129360955803101, 0.4388978895971636, 0.4649657088162737, 0.4911282678368701, 0.5173741274577347, 0.5436916959133814, 0.5700692302001829, 0.5964948375070233, 0.6229564767650037, 0.6494419603305343, 0.6759389558158937, 0.7024349880810022, 0.7289174413997411, 0.7553735618136643, 0.7817904596853873, 0.8081551124632975, 0.8344543676685315, 0.8606749461143814, 0.8868034453674609, 0.9128263434590498, 0.9387300028540799, 0.9645006746842021, 0.9901245032503004, 1.015587530798707, 1.040875702574195, 1.065974872151634, 1.090870807046936, 1.115549194606669, 1.139995648174401, 1.164195713530527, 1.18813487560099, 1.211798565428982, 1.235172167402352, 1.258241026728088, 1.280990457143938, 1.303405748855879, 1.325472176688844, 1.347175008436865, 1.368499513397494, 1.389430971074219, 1.409954680029364, 1.430055966868913, 1.449720195339594, 1.468932775517619, 1.487679173067512, 1.505944918548674, 1.523715616746525, 1.540976956004445, 1.557714717532153, 1.573914784665695, 1.589563152053886, 1.604645934745782, 1.61914937715367, 1.633059861866079, 1.646363918285426, 1.65904823106524, 1.671099648322307, 1.682505189599645, 1.693252053556956, 1.703327625366089, 1.712719483790044, 1.721415407925285, 1.729403383588471, 1.736671609330242, 1.743208502060372, 1.749002702270495, 1.754043078842596, 1.758318733433683, 1.761819004429379, 1.764533470461747, 1.766451953489288, 1.767564521439938, 1.767861490420868, 1.767333426502044, 1.765971147083807, 1.763765721862138, 1.760708473408894, 1.756790977387922, 1.752005062431827, 1.746342809708056, 1.73979655220697, 1.732358873788699, 1.724022608029734, 1.714780836914468, 1.704626889421158, 1.693554340056166, 1.681557007394636, 1.668628952690158, 1.654764478620318, 1.639958128239363, 1.624204684213509, 1.607499168418637, 1.589836841984306, 1.571213205872051, 1.551624002079893, 1.531065215568803, 1.509533077010524, 1.487024066459637, 1.463534918056045, 1.439062625867122, 1.41360445098161, 1.387157929969905, 1.359720884827664, 1.331291434521629, 1.301868008258218, 1.271449360596692, 1.24003458852964, 1.207623150653984, 1.174214888555792, 1.139810050531771, 1.104409317769435, 1.068013833106546, 1.030625232488476, 0.9922456792396311, 0.952877901261965, 0.9125252312698523, 0.8711916501661876, 0.828881833659456, 0.785601202215668, 0.7413559744324302, 0.6961532239149881, 0.6500009397257805, 0.6029080904698609, 0.5548846920684043, 0.5059418792613958, 0.4560919808684225, 0.4053485988232192, 0.3537266909831918, 0.3012426576994875, 0.2479144321162487, 0.1937615741493917, 0.1388053680755301, 0.08306892364043116, 0.02657728057356776, -0.03064248362917111, -0.0885611428188304, -0.1471472083951644, -0.2063668128205946, -0.2661835907016181, -0.3265585577175626, -0.3874499877280915, -0.4488132884416094, -0.5106008760235008, -0.5727620491134915, -0.6352428627498634, -0.6979860027527585, -0.7609306611353356, -0.8240124132397566, -0.8871630972862469, -0.9503143891562629, -1.013405945717686, -1.07637029445945, -1.139115600808456, -1.201556146163991, -1.263598461412328, -1.325144971947503, -1.386093419457548, -1.446336850163621, -1.505763578566488, -1.564257164081952, -1.621696403883224, -1.67795534335164, -1.732903305936643, -1.786404944284953, -1.838320314595545, -1.888504976249755, -1.93681011885642, -1.983082718939349, -2.027165728577448, -2.068898298385222, -2.108116037291678, -2.144651311636851, -2.178333586155809, -2.208989809459005, -2.236444846640478, -2.260521961652191, -2.281043352069214, -2.297830738834558, -2.310706013510904, -2.319491945476126, -2.324012951376878, -2.32409592899584, -2.319571157489732, -2.310273265712625, -2.296042270048221, -2.276724682831136, -2.2521746920361, -2.222255412450786, -2.186840208017746, -2.145814084428966, -2.099075150377959, -2.046536145114471, -1.988126029101303, -1.923791633637198, -1.85349936428048, -1.77723695178184, -1.695015243008773, -1.606870023016947, -1.512863857994425, -1.413087947273711, -1.307663970975908, -1.196745918124536, -1.080521878249175, -0.9592157775990137, -0.8330890391137414, -0.7024421432672815, -0.5676160648249436, -0.428993558456533, -0.2870002640506239, -0.1421056005065979, 0.005176585226156811, 0.1542876490006691, 0.3046240971214432, 0.4555378892709313, 0.60633713652517, 0.7562872352961277, 0.9046124786924533, 1.050498187000167, 1.193093398671758, 1.331514162291723, 1.464847468369129, 1.592155857398831, 1.712482737334532, 1.824858439327937, 1.928307035207131, 2.021853933592649, 2.10453426368316, 2.175402046491588, 2.233540142592398, 2.278070953179986, 2.308167837380577, 2.323067193271148, 2.322081132929654, 2.304610663094172, 2.270159262705165, 2.21834672685, 2.148923123578761, 2.06178268593038, 1.956977436579568, 1.834730317142389, 1.695447568796727, 1.539730086005097, 1.368383441385657, 1.182426257873909, 0.9830965850630257, 0.7718559209150987, 0.5503905089064325, 0.3206095352059821, 0.08463985186731171, -0.155183138512606, -0.3963287823574636, -0.6360889750359312, -0.8716007357392715, -1.099873777128869, -1.317823220427588, -1.522307522150309, -1.710171578116409, -1.878294853329175, -2.023644252703639, -2.143331297915305, -2.234673010844582, -2.29525572589768, -2.323000864359895, -2.316231507216088, -2.273738402843607, -2.194843847957843, -2.079461690556606, -1.928151529846371, -1.742165038795376, -1.523482219606755, -1.274835331482666, -0.9997182146890431, -0.70237878667734, -0.3877926164776839, -0.0616157039513853, 0.269885088969239, 0.599925078577309, 0.9213188503096416, 1.226621316763894, 1.508291879540058, 1.758879940822884, 1.971229020855451, 2.138695667755207, 2.255378223249953, 2.31634936698083, 2.317885246877936, 2.257682966340633, 2.135057301406481, 1.951106830882072, 1.708839253206093, 1.413245612385637, 1.071313538249728, 0.6919704956665378, 0.2859494959862439, -0.1344282008183505, -0.5555561475395865, -0.9629309328725536, -1.341621009204848, -1.676807095498279, -1.954380266937191, -2.161578781776865, -2.287639670891325, -2.324436419415276, -2.267070043592049, -2.114377888790909, -1.869322948548586, -1.539226832138465, -1.135812065142497, -0.6750245084161627, -0.1766145407060163, 0.3365336574360721, 0.8393219662640767, 1.305588781105119, 1.709453902804619, 2.026803177325909, 2.236839881505574, 2.323613156928579, 2.277420334561974, 2.095971510823904, 1.785202981731526, 1.359632722233694, 0.8421672917969013, 0.2632961010158538, -0.3403541115171306, -0.928085411481558, -1.457742531531107, -1.888743770910752, -2.18534075057045, -2.319867181519198 + ], + "label": "2S", + "angular_momentum": 0 + }, + { + "index": 2, + "radial_function": [ + 2.726781356990772e-08, 2.795810154794222e-08, 2.866586424887962e-08, 2.93915440474128e-08, 3.013559451700233e-08, 3.08984807133741e-08, 3.16806794651945e-08, 3.248267967210372e-08, 3.33049826102937e-08, 3.414810224582219e-08, 3.501256555585794e-08, 3.589891285805852e-08, 3.680769814828632e-08, 3.773948944687384e-08, 3.869486915365494e-08, 3.967443441198325e-08, 4.067879748196614e-08, 4.170858612314721e-08, 4.276444398687591e-08, 4.384703101861098e-08, 4.495702387040708e-08, 4.60951163238444e-08, 4.72620197236645e-08, 4.845846342238363e-08, 4.968519523616188e-08, 5.094298191221181e-08, 5.223260960804064e-08, 5.355488438282381e-08, 5.491063270121781e-08, 5.630070194992763e-08, 5.772596096735012e-08, 5.918730058662635e-08, 6.068563419244078e-08, 6.222189829191606e-08, 6.379705309996044e-08, 6.541208313943233e-08, 6.706799785649909e-08, 6.876583225157315e-08, 7.050664752622052e-08, 7.229153174644613e-08, 7.412160052276934e-08, 7.599799770751677e-08, 7.792189610976643e-08, 7.989449822839094e-08, 8.191703700365842e-08, 8.399077658785856e-08, 8.6117013135439e-08, 8.829707561314318e-08, 9.053232663065676e-08, 9.282416329228286e-08, 9.517401807017595e-08, 9.758335969968335e-08, 1.000536940973514e-07, 1.025865653021711e-07, 1.051835564406528e-07, 1.078462907163288e-07, 1.10576432424309e-07, 1.133756879915172e-07, 1.162458070432624e-07, 1.191885834968111e-07, 1.222058566826411e-07, 1.252995124940823e-07, 1.2847148456606e-07, 1.317237554836769e-07, 1.350583580213935e-07, 1.384773764135746e-07, 1.419829476572026e-07, 1.45577262847568e-07, 1.492625685477729e-07, 1.530411681929048e-07, 1.569154235297549e-07, 1.608877560929848e-07, 1.649606487186619e-07, 1.691366470961096e-07, 1.734183613590447e-07, 1.778084677169906e-07, 1.823097101279938e-07, 1.86924902013683e-07, 1.916569280177456e-07, 1.965087458089213e-07, 2.014833879296361e-07, 2.065839636914362e-07, 2.118136611184056e-07, 2.171757489397787e-07, 2.226735786330002e-07, 2.283105865185003e-07, 2.340902959075022e-07, 2.400163193042018e-07, 2.460923606636931e-07, 2.523222177070564e-07, 2.587097842950494e-07, 2.652590528618912e-07, 2.719741169106564e-07, 2.788591735718398e-07, 2.859185262266937e-07, 2.9315658719697e-07, 3.005778805027589e-07, 3.081870446901379e-07, 3.159888357304054e-07, 3.239881299927084e-07, 3.32189927291919e-07, 3.405993540136728e-07, 3.49221666318513e-07, 3.580622534271508e-07, 3.671266409888927e-07, 3.764204945353331e-07, 3.859496230214855e-07, 3.957199824565518e-07, 4.057376796266064e-07, 4.160089759115214e-07, 4.265402911985127e-07, 4.373382078947617e-07, 4.484094750416146e-07, 4.597610125329321e-07, 4.713999154402299e-07, 4.833334584473023e-07, 4.955691003971165e-07, 5.08114488953806e-07, 5.209774653826836e-07, 5.341660694512629e-07, 5.476885444543411e-07, 5.615533423663002e-07, 5.757691291238326e-07, 5.903447900424009e-07, 6.052894353698165e-07, 6.20612405980399e-07, 6.363232792132922e-07, 6.524318748585703e-07, 6.689482612948832e-07, 6.858827617824803e-07, 7.032459609155295e-07, 7.210487112377888e-07, 7.393021400257441e-07, 7.580176562434657e-07, 7.772069576735279e-07, 7.968820382284357e-07, 8.170551954471501e-07, 8.377390381813864e-07, 8.589464944764794e-07, 8.806908196517684e-07, 9.029856045855138e-07, 9.258447842095637e-07, 9.492826462190523e-07, 9.733138400025812e-07, 9.979533857984745e-07, 1.02321668408281e-06, 1.049119525195118e-06, 1.07567809920774e-06, 1.102909006045043e-06, 1.130829265858786e-06, 1.15945632966614e-06, 1.188808090257004e-06, 1.218902893377448e-06, 1.249759549196236e-06, 1.281397344061659e-06, 1.313836052555944e-06, 1.347095949854849e-06, 1.381197824400124e-06, 1.416162990892767e-06, 1.452013303615217e-06, 1.488771170090761e-06, 1.526459565088759e-06, 1.565102044984377e-06, 1.604722762481847e-06, 1.645346481710443e-06, 1.686998593702568e-06, 1.729705132263727e-06, 1.773492790244172e-06, 1.818388936222492e-06, 1.864421631611557e-06, 1.911619648197419e-06, 1.960012486122293e-06, 2.009630392322693e-06, 2.060504379434347e-06, 2.112666245175702e-06, 2.166148592222031e-06, 2.220984848582762e-06, 2.27720928849452e-06, 2.334857053843142e-06, 2.393964176127965e-06, 2.454567598982068e-06, 2.516705201262687e-06, 2.580415820726065e-06, 2.645739278301646e-06, 2.712716402980778e-06, 2.781389057335363e-06, 2.851800163682618e-06, 2.923993730912052e-06, 2.998014881991603e-06, 3.073909882170092e-06, 3.151726167893493e-06, 3.231512376453336e-06, 3.313318376385476e-06, 3.397195298638484e-06, 3.483195568531006e-06, 3.571372938518012e-06, 3.661782521786632e-06, 3.754480826702271e-06, 3.849525792126783e-06, 3.946976823630693e-06, 4.046894830621952e-06, 4.149342264414769e-06, 4.254383157261882e-06, 4.362083162375033e-06, 4.472509594958461e-06, 4.585731474281002e-06, 4.701819566813374e-06, 4.82084643045722e-06, 4.942886459893835e-06, 5.06801593308086e-06, 5.196313058925793e-06, 5.327858026166513e-06, 5.462733053488905e-06, 5.601022440913237e-06, 5.742812622481304e-06, 5.888192220277092e-06, 6.037252099815123e-06, 6.190085426830562e-06, 6.346787725507044e-06, 6.507456938178368e-06, 6.67219348654127e-06, 6.841100334417941e-06, 7.014283052106911e-06, 7.191849882363036e-06, 7.373911808047584e-06, 7.560582621490522e-06, 7.75197899560885e-06, 7.948220556824814e-06, 8.149429959829958e-06, 8.35573296424172e-06, 8.567258513200078e-06, 8.784138813954037e-06, 9.006509420487547e-06, 9.234509318237023e-06, 9.468281010953292e-06, 9.707970609761841e-06, 9.953727924477796e-06, 1.020570655723178e-05, 1.046406399846582e-05, 1.072896172535907e-05, 1.10005653027445e-05, 1.127904448658042e-05, 1.156457333004045e-05, 1.185733029228905e-05, 1.215749835001019e-05, 1.24652651117588e-05, 1.278082293520664e-05, 1.310436904735562e-05, 1.343610566779388e-05, 1.377624013507141e-05, 1.412498503627427e-05, 1.448255833987847e-05, 1.484918353196613e-05, 1.522508975588946e-05, 1.561051195546945e-05, 1.60056910218189e-05, 1.641087394388147e-05, 1.68263139627806e-05, 1.725227073007507e-05, 1.768901047001973e-05, 1.813680614593278e-05, 1.859593763077371e-05, 1.90666918820381e-05, 1.954936312107893e-05, 2.004425301696603e-05, 2.05516708749986e-05, 2.107193382998861e-05, 2.160536704443574e-05, 2.215230391171748e-05, 2.271308626442161e-05, 2.32880645879506e-05, 2.387759823953204e-05, 2.448205567277103e-05, 2.51018146678855e-05, 2.573726256776778e-05, 2.638879652001969e-05, 2.70568237251129e-05, 2.774176169082861e-05, 2.844403849313613e-05, 2.916409304367302e-05, 2.990237536399335e-05, 3.065934686675607e-05, 3.143548064402823e-05, 3.223126176288349e-05, 3.304718756848045e-05, 3.388376799480946e-05, 3.474152588330272e-05, 3.562099730950549e-05, 3.65227319180132e-05, 3.744729326588314e-05, 3.839525917473459e-05, 3.93672220917581e-05, 4.036378945985816e-05, 4.138558409716084e-05, 4.243324458612335e-05, 4.350742567248728e-05, 4.460879867432591e-05, 4.573805190143944e-05, 4.689589108536057e-05, 4.808303982023858e-05, 4.930024001487631e-05, 5.054825235620308e-05, 5.182785678447147e-05, 5.313985298047517e-05, 5.44850608650916e-05, 5.58643211114601e-05, 5.727849567011636e-05, 5.872846830740922e-05, 6.021514515753652e-05, 6.173945528854374e-05, 6.330235128263796e-05, 6.490480983118005e-05, 6.654783234472432e-05, 6.823244557848721e-05, 6.995970227363464e-05, 7.173068181478656e-05, 7.354649090415035e-05, 7.540826425270107e-05, 7.73171652888408e-05, 7.927438688497805e-05, 8.128115210247918e-05, 8.333871495545722e-05, 8.544836119387255e-05, 8.761140910643384e-05, 8.982921034379923e-05, 9.21031507625906e-05, 9.443465129074475e-05, 9.682516881474296e-05, 9.927619708926917e-05, 0.0001017892676698635, 0.0001043659508691528, 0.0001070078567372498, 0.0001097166360669361, 0.0001124939814242499, 0.0001153416282051211, 0.0001182613557187137, 0.0001212549882981425, 0.0001243243964392629, 0.0001274714979682366, 0.0001306982592386002, 0.0001340066963585825, 0.0001373988764494302, 0.0001408769189355271, 0.0001444429968671067, 0.0001480993382763803, 0.0001518482275679242, 0.0001556920069441852, 0.0001596330778669962, 0.0001636739025560045, 0.0001678170055249431, 0.0001720649751567028, 0.0001764204653181749, 0.0001808861970158752, 0.0001854649600933693, 0.0001901596149715552, 0.000194973094432883, 0.0001999084054506132, 0.0002049686310642546, 0.0002101569323023368, 0.0002154765501537134, 0.0002209308075886162, 0.0002265231116307072, 0.0002322569554814196, 0.0002381359206978952, 0.0002441636794258692, 0.0002503439966888862, 0.0002566807327352556, 0.000263177845444206, 0.0002698393927927218, 0.0002766695353845857, 0.0002836725390431945, 0.0002908527774697393, 0.000298214734968405, 0.0003057630092402595, 0.000313502314247563, 0.0003214374831502639, 0.0003295734713164655, 0.0003379153594088357, 0.0003464683565486377, 0.0003552378035595252, 0.0003642291762930482, 0.0003734480890379184, 0.0003829002980151032, 0.000392591704960828, 0.000402528360799987, 0.0004127164694117296, 0.0004231623914900008, 0.0004338726485012616, 0.0004448539267415565, 0.0004561130814958384, 0.0004676571413018618, 0.0004794933123210279, 0.0004916289828197191, 0.0005040717277626203, 0.0005168293135217788, 0.0005299097027040954, 0.0005433210590999587, 0.000557071752756126, 0.000571170365176033, 0.0005856256946509029, 0.0006004467617243073, 0.0006156428147940315, 0.000631223335854579, 0.0006471980463837635, 0.0006635769133765755, 0.0006803701555309071, 0.0006975882495877685, 0.0007152419368307856, 0.0007333422297484346, 0.0007519004188630788, 0.000770928079731111, 0.0007904370801183273, 0.0008104395873547379, 0.0008309480758736888, 0.0008519753349391756, 0.0008735344765668457, 0.0008956389436423776, 0.000918302518243356, 0.000941539330168797, 0.000965363865681365, 0.0009897909764682174, 0.001014835888825428, 0.001040514213071195, 0.001066841953194205, 0.00109383551674206, 0.001121511724956386, 0.001149887823160278, 0.001178981491404293, 0.001208810855377204, 0.001239394497588584, 0.001270751468828915, 0.001302901299914789, 0.001335864013725885, 0.001369660137540213, 0.001404310715676124, 0.001439837322447761, 0.00147626207544091, 0.001513607649118691, 0.001551897288763773, 0.001591154824764783, 0.001631404687257876, 0.001672671921128099, 0.001714982201382631, 0.001758361848903014, 0.001802837846586399, 0.001848437855883956, 0.001895190233746655, 0.001943124049989806, 0.001992269105082569, 0.002042655948375742, 0.002094315896777729, 0.002147281053888154, 0.002201584329599679, 0.002257259460183425, 0.002314341028861459, 0.002372864486886812, 0.002432866175134696, 0.002494383346224514, 0.002557454187178065, 0.002622117842632956, 0.002688414438617398, 0.002756385106907554, 0.002826072009971696, 0.002897518366523909, 0.002970768477692907, 0.003045867753829236, 0.003122862741954868, 0.003201801153881204, 0.003282731895001004, 0.003365705093775154, 0.003450772131930388, 0.003537985675379276, 0.003627399705884154, 0.003719069553479092, 0.003813051929665617, 0.003909404961403225, 0.004008188225908827, 0.004109462786285257, 0.004213291227995923, 0.004319737696203905, 0.00442886793399695, 0.004540749321513913, 0.004655450915995748, 0.004773043492778047, 0.004893599587247702, 0.005017193537782806, 0.005143901529697901, 0.005273801640213356, 0.005406973884474945, 0.005543500262640281, 0.005683464808058288, 0.005826953636562301, 0.005974054996901082, 0.006124859322327006, 0.006279459283372436, 0.006437949841829247, 0.006600428305962308, 0.006766994386977424, 0.006937750256770021, 0.0071128006069792, 0.007292252709370979, 0.007476216477579052, 0.007664804530224461, 0.007858132255443427, 0.008056317876848431, 0.008259482520947128, 0.008467750286046884, 0.008681248312671865, 0.008900106855516919, 0.00912445935696789, 0.009354442522210484, 0.009590196395962067, 0.009831864440842327, 0.01007959361742099, 0.01033353446595891, 0.01059384118987712, 0.01086067174097467, 0.01113418790642308, 0.01141455539756561, 0.01170194394054181, 0.01199652736876752, 0.0122984837172932, 0.01260799531906327, 0.01292524890310272, 0.01325043569465351, 0.01358375151728199, 0.01392539689698084, 0.01427557716828512, 0.01463450258242337, 0.0150023884175214, 0.01537945509087828, 0.01576592827332928, 0.0161620390057132, 0.01656802381745498, 0.01698412484727829, 0.01741058996605832, 0.01784767290182123, 0.018295633366899, 0.01875473718724435, 0.01922525643390496, 0.01970746955666115, 0.02020166151981914, 0.02070812394015675, 0.02122715522701319, 0.02175906072450683, 0.0223041528558708, 0.02286275126987963, 0.02343518298935134, 0.02402178256168814, 0.02462289221143241, 0.02523886199479299, 0.02587004995610429, 0.02651682228616616, 0.0271795534824147, 0.02785862651086115, 0.02855443296973379, 0.02926737325474643, 0.02999785672591896, 0.0307463018758538, 0.0315131364993748, 0.03229879786442554, 0.03310373288410622, 0.03392839828973034, 0.03477326080476148, 0.03563879731948907, 0.0365254950662811, 0.03743385179524952, 0.03836437595014235, 0.03931758684426815, 0.04029401483624616, 0.04129420150535366, 0.04231869982623378, 0.04336807434270531, 0.0444429013404026, 0.04554376901795325, 0.04667127765638271, 0.04782603978641402, 0.04900868035331469, 0.05021983687891232, 0.05146015962038414, 0.05273031172540321, 0.05403096938318775, 0.05536282197098828, 0.05672657219550759, 0.05812293622872604, 0.05955264383757321, 0.06101643850685175, 0.0625150775547937, 0.06404933224058695, 0.06561998786317691, 0.067227843850614, 0.06887371383917346, 0.07055842574143475, 0.0722828218024662, 0.07404775864321883, 0.0758541072901791, 0.07770275319029399, 0.0795945962101222, 0.08153055061811815, 0.08351154504890726, 0.08553852244834093, 0.08761243999808148, 0.08973426901839024, 0.09190499484774509, 0.09412561669783878, 0.0963971474824531, 0.09872061361863041, 0.1010970547984957, 0.1035275237300156, 0.106013085844898, 0.1085548189717706, 0.1111538129726901, 0.1138111693409591, 0.1165280007581493, 0.1193054306081357, 0.1221445924458753, 0.1250466294185673, 0.1280126936367464, 0.1310439454927768, 0.1341415529241157, 0.1373066906186294, 0.1405405391591549, 0.1438442841043976, 0.1472191150031764, 0.1506662243389247, 0.1541868064012669, 0.1577820560814018, 0.1614531675879242, 0.1652013330796435, 0.1690277412118543, 0.1729335755924446, 0.176920013144138, 0.1809882223690928, 0.1851393615120122, 0.1893745766178515, 0.193694999480145, 0.1981017454759334, 0.2025959112832145, 0.2071785724768212, 0.2118507809985951, 0.2166135624977141, 0.221467913537045, 0.2264147986613845, 0.2314551473235187, 0.2365898506640439, 0.241819758140988, 0.2471456740053624, 0.2525683536188646, 0.2580884996101365, 0.2637067578661042, 0.2694237133551636, 0.2752398857792002, 0.2811557250516775, 0.2871716065993796, 0.2932878264857051, 0.299504596353821, 0.3058220381884448, 0.3122401788954764, 0.3187589446993093, 0.3253781553582047, 0.3320975181988257, 0.3389166219717643, 0.3458349305306757, 0.3528517763385745, 0.3599663538057639, 0.3671777124649567, 0.3744847499902832, 0.3818862050680923, 0.3893806501288308, 0.3969664839506734, 0.4046419241471515, 0.4124049995526855, 0.4202535425216595, 0.4281851811586246, 0.4361973314991714, 0.4442871896631909, 0.4524517240045123, 0.4606876672832636, 0.4689915088899065, 0.4773594871524914, 0.4857875817615296, 0.4942715063498133, 0.5028067012675388, 0.5113883265963519, 0.520011255449187, 0.5286700676062466, 0.5373590435410142, 0.5460721588937909, 0.5548030794540388, 0.563545156716544, 0.5722914240802992, 0.5810345937628819, 0.5897670545069444, 0.5984808701593519, 0.6071677792072352, 0.6158191953589821, 0.6244262092617416, 0.6329795914503769, 0.641469796625999, 0.64988696936501, 0.6582209513621093, 0.6664612903127751, 0.6745972505422373, 0.6826178254889655, 0.6905117521508833, 0.6982675276020203, 0.7058734276858536, 0.7133175279890898, 0.7205877271960514, 0.727671772918868, 0.7345572900923522, 0.7412318120144916, 0.7476828141037749, 0.7538977504330083, 0.7598640930855375, 0.7655693743638423, 0.7710012318620557, 0.7761474563928472, 0.7809960427352615, 0.7855352431431267, 0.7897536235235532, 0.7936401221615289, 0.7971841108295502, 0.800375458080509, 0.8032045944774601, 0.8056625794654096, 0.8077411695377631, 0.8094328872935206, 0.8107310909207638, 0.8116300435774357, 0.8121249820720794, 0.8122121841752228, 0.8118890338167858, 0.8111540833466413, 0.810007111954755, 0.8084491792648092, 0.8064826730316139, 0.804111349788881, 0.8013403672110918, 0.7981763068725777, 0.7946271860099502, 0.7907024568223436, 0.7864129917795427, 0.781771053352982, 0.7767902465413447, 0.7714854525336242, 0.7658727418409903, 0.75996926523787, 0.7537931208856373, 0.7473631960731102, 0.7406989821004288, 0.7338203609611879, 0.7267473626461792, 0.7194998921052599, 0.7120974251663796, 0.7045586730271285, 0.696901215308927, 0.6891411021014773, 0.6812924259292933, 0.6733668651467145, 0.6653732009155932, 0.657316810643063, 0.6491991415565401, 0.641018953510986, 0.6327789075021016, 0.6244830310050865, 0.6161353495775791, 0.6077398841887728, 0.5993006486176935, 0.5908216469195626, 0.582306870958867, 0.5737602980074739, 0.5651858884058808, 0.5565875832854572, 0.5479693023493346, 0.5393349417094254, 0.5306883717769116, 0.5220334352033974, 0.5133739448699016, 0.5047136819207013, 0.4960563938390591, 0.4874057925618837, 0.4787655526303648, 0.4701393093736753, 0.4615306571229124, 0.4529431474525533, 0.4443802874468246, 0.4358455379885373, 0.4273423120681097, 0.4188739731106954, 0.4104438333195456, 0.4020551520339687, 0.3937111341004932, 0.3854149282561085, 0.3771696255227293, 0.3689782576123173, 0.3608437953423921, 0.3527691470619621, 0.3447571570882195, 0.3368106041546547, 0.328932199871555, 0.3211245872001697, 0.3133903389421338, 0.3057319562460429, 0.2981518671333769, 0.2906524250462542, 0.2832359074197814, 0.2759045142820293, 0.2686603668849155, 0.2615055063695163, 0.2544418924695422, 0.2474714022569169, 0.2405958289335735, 0.2338168806737427, 0.2271361795211351, 0.2205552603455334, 0.2140755698633903, 0.2076984657270821, 0.2014252156875018, 0.1952569968346701, 0.1891948949210195, 0.1832399037719458, 0.1773929247881395, 0.1716547665440901, 0.1660261444870147, 0.1605076807402894, 0.1550999040152579, 0.1498032496350661, 0.1446180596739124, 0.1395445832148214, 0.1345829767287432, 0.1297333045774466, 0.1249955396423217, 0.1203695640808332, 0.1158551702119701, 0.1114520615316269, 0.1071598538584237, 0.1029780766100314, 0.0989061742096165, 0.09494350762155594, 0.09108935601510865, 0.0873429185542518, 0.08370331631141872, 0.08016959430239903, 0.07674072363919034, 0.07341560379712496, 0.07019306499213758, 0.06707187066359176, 0.06405072005765075, 0.06112825090575941, 0.05830304219240388, 0.05557361700593776, 0.0529384454659059, 0.05039594771996667, 0.04794449700320882, 0.04558242275238423, 0.04330801376733296, 0.04111952141166525, 0.03901516284458646, 0.03699312427560749, 0.03505156423377555, 0.0331886168429896, 0.03140239509493108, 0.0296909941111449, 0.02805249438584778, 0.02648496500112089, 0.02498646680626109, 0.02355505555321904, 0.022188784980243, 0.02088570983607236, 0.01964388883728403, 0.0184613875516867, 0.01733628120098017, 0.01626665737624836, 0.01525061866023289, 0.01428628515073724, 0.01337179687993666, 0.01250531612481499, 0.01168502960441176, 0.01090915056004121, 0.01017592071513392, 0.00948361211185101, 0.008830528822125754, 0.008215008531296497, 0.007635423993003847, 0.00709018435453272, 0.006577736352281991, 0.006096565377539391, 0.005645196413223791, 0.005222194842728818, 0.004826167132458407, 0.004455761390084077, 0.004109667800973463, 0.003786618945637768, 0.003485390001420781, 0.003204798832002091, 0.002943705968610941, 0.002701014487143394, 0.002475669785643223, 0.002266659266845384, 0.002073011930689369, 0.001893797881887918, 0.001728127757784104, 0.00157515208184684, 0.001434060548241432, 0.001304081242968323, 0.001184479807090149, 0.001074558547565256, 0.0009736555011757919, 0.0008811434569812749, 0.000796428942644411, 0.0007189511798731625, 0.0006481810140630785, 0.0005836198231552807, 0.0005247984103881948, 0.0004712758855765303, 0.0004226385392997236, 0.0003784987140953755, 0.0003384936765445301, 0.000302284493841929, 0.0002695549180185805, 0.0002400102898341555, 0.0002133763541270613, 0.0001893988146617844, 0.0001678399693412417, 0.0001484827956101911, 0.0001311267738848909, 0.000115584518459277, 0.0001016824522711628, 8.926052781164656e-05, 7.817128375805755e-05, 6.827892540001164e-05, 5.945842671328974e-05, 5.159465142728564e-05, 4.458148883207336e-05, 3.832099603659937e-05, 3.27226211443472e-05, 2.77018970004653e-05, 2.319661794721779e-05, 1.905319716645324e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "2P", + "angular_momentum": 1 + }, + { + "index": 3, + "radial_function": [ + 1.856930810322221e-08, 1.903939236900964e-08, 1.952137687428831e-08, 2.001556287503703e-08, 2.052225925356518e-08, 2.1041782711574e-08, 2.157445796810542e-08, 2.212061796250202e-08, 2.268060406250474e-08, 2.325476627761897e-08, 2.384346347788157e-08, 2.444706361816633e-08, 2.50659439681676e-08, 2.570049134820607e-08, 2.635110237100401e-08, 2.701818368958086e-08, 2.770215225142457e-08, 2.840343555909726e-08, 2.912247193743797e-08, 2.985971080753004e-08, 3.061561296760356e-08, 3.139065088104916e-08, 3.218530897172285e-08, 3.300008392672653e-08, 3.383548500685355e-08, 3.469203436489278e-08, 3.557026737199118e-08, 3.647073295227789e-08, 3.739399392595934e-08, 3.834062736110027e-08, 3.931122493430945e-08, 4.03063933005568e-08, 4.13267544723521e-08, 4.237294620852276e-08, 4.344562241283386e-08, 4.454545354269846e-08, 4.567312702823535e-08, 4.682934770193472e-08, 4.801483823920109e-08, 4.92303396100488e-08, 5.047661154223154e-08, 5.175443299609686e-08, 5.306460265146108e-08, 5.44079394068098e-08, 5.578528289113586e-08, 5.719749398873371e-08, 5.864545537727997e-08, 6.013007207953506e-08, 6.165227202901122e-08, 6.321300664996077e-08, 6.481325145204613e-08, 6.645400664006474e-08, 6.813629773910876e-08, 6.986117623555075e-08, 7.162972023425661e-08, 7.344303513243454e-08, 7.530225431054388e-08, 7.720853984069379e-08, 7.916308321297488e-08, 8.116710608017894e-08, 8.322186102136966e-08, 8.53286323247848e-08, 8.748873679055673e-08, 8.970352455375392e-08, 9.19743799282586e-08, 9.430272227200575e-08, 9.669000687412696e-08, 9.9137725864552e-08, 1.016474091466365e-07, 1.042206253534005e-07, 1.068589828279725e-07, 1.095641306288543e-07, 1.123377595606351e-07, 1.151816032307962e-07, 1.180974391332713e-07, 1.210870897594341e-07, 1.241524237372131e-07, 1.272953569990426e-07, 1.305178539793793e-07, 1.338219288425362e-07, 1.372096467415962e-07, 1.406831251091981e-07, 1.442445349809971e-07, 1.478961023526293e-07, 1.516401095710282e-07, 1.554788967609613e-07, 1.594148632876804e-07, 1.634504692565991e-07, 1.675882370509337e-07, 1.71830752908271e-07, 1.76180668537045e-07, 1.806407027739363e-07, 1.852136432832281e-07, 1.899023482991808e-07, 1.94709748412517e-07, 1.996388484021276e-07, 2.046927291131511e-07, 2.098745493825946e-07, 2.151875480137023e-07, 2.206350458003063e-07, 2.262204476024208e-07, 2.319472444743834e-07, 2.378190158468676e-07, 2.438394317641342e-07, 2.500122551779206e-07, 2.563413442993939e-07, 2.628306550106502e-07, 2.694842433372569e-07, 2.763062679833887e-07, 2.833009929311417e-07, 2.904727901056453e-07, 2.978261421076444e-07, 3.053656450152568e-07, 3.130960112566543e-07, 3.210220725554701e-07, 3.291487829507625e-07, 3.374812218934347e-07, 3.460245974210372e-07, 3.547842494129408e-07, 3.637656529279147e-07, 3.729744216261925e-07, 3.824163112781709e-07, 3.920972233619276e-07, 4.020232087518123e-07, 4.122004715004147e-07, 4.22635372716267e-07, 4.333344345397165e-07, 4.443043442194436e-07, 4.555519582921759e-07, 4.670843068682154e-07, 4.789085980254445e-07, 4.910322223145748e-07, 5.034627573784391e-07, 5.162079726882235e-07, 5.292758343995974e-07, 5.426745103317681e-07, 5.564123750725866e-07, 5.704980152128877e-07, 5.849402347133303e-07, 5.997480604071078e-07, 6.149307476419449e-07, 6.304977860649336e-07, 6.464589055538022e-07, 6.628240822983355e-07, 6.796035450357476e-07, 6.968077814438933e-07, 7.144475446963301e-07, 7.325338601833144e-07, 7.510780324029393e-07, 7.70091652026724e-07, 7.895866031440528e-07, 8.095750706900195e-07, 8.300695480612967e-07, 8.51082844924798e-07, 8.726280952240228e-07, 8.947187653880591e-07, 9.173686627484143e-07, 9.405919441689036e-07, 9.644031248939996e-07, 9.888170876211847e-07, 1.013849091802945e-06, 1.039514783184264e-06, 1.065830203581533e-06, 1.092811800909021e-06, 1.120476439459165e-06, 1.148841410443068e-06, 1.177924442797876e-06, 1.207743714267688e-06, 1.238317862764996e-06, 1.269665998019745e-06, 1.301807713523237e-06, 1.334763098774449e-06, 1.368552751836301e-06, 1.403197792209811e-06, 1.438719874034148e-06, 1.475141199620801e-06, 1.512484533330418e-06, 1.550773215800858e-06, 1.590031178535454e-06, 1.630282958860579e-06, 1.671553715261782e-06, 1.713869243108247e-06, 1.757255990775202e-06, 1.801741076174513e-06, 1.847352303703732e-06, 1.894118181624137e-06, 1.942067939878772e-06, 1.991231548361444e-06, 2.041639735648224e-06, 2.093324008203132e-06, 2.146316670069921e-06, 2.200650843062437e-06, 2.256360487465974e-06, 2.313480423262726e-06, 2.37204635189452e-06, 2.432094878576396e-06, 2.493663535175123e-06, 2.556790803666744e-06, 2.621516140187988e-06, 2.687879999696504e-06, 2.755923861255265e-06, 2.825690253957123e-06, 2.897222783505482e-06, 2.970566159467901e-06, 3.045766223219561e-06, 3.122869976594008e-06, 3.201925611259267e-06, 3.282982538837421e-06, 3.366091421786701e-06, 3.451304205065295e-06, 3.538674148596566e-06, 3.628255860556233e-06, 3.720105331501977e-06, 3.814279969367064e-06, 3.910838635339762e-06, 4.009841680650857e-06, 4.111350984292538e-06, 4.215429991691865e-06, 4.322143754363285e-06, 4.431558970564822e-06, 4.543744026983301e-06, 4.658769041474891e-06, 4.776705906887317e-06, 4.89762833599144e-06, 5.021611907550147e-06, 5.148734113553208e-06, 5.279074407647978e-06, 5.412714254795733e-06, 5.549737182185045e-06, 5.690228831433862e-06, 5.834277012112779e-06, 5.9819717566233e-06, 6.133405376464908e-06, 6.288672519926477e-06, 6.447870231237966e-06, 6.6110980112191e-06, 6.778457879463472e-06, 6.950054438096253e-06, 7.125994937145889e-06, 7.306389341570373e-06, 7.491350399979844e-06, 7.680993715098906e-06, 7.875437816012043e-06, 8.074804232237805e-06, 8.27921756967781e-06, 8.48880558848793e-06, 8.703699282920512e-06, 8.924032963187308e-06, 9.149944339394368e-06, 9.381574607601337e-06, 9.619068538058764e-06, 9.862574565678765e-06, 1.01122448827953e-05, 1.036823553427221e-05, 1.06307065150183e-05, 1.089982186997043e-05, 1.117574979660698e-05, 1.145866275005577e-05, 1.17487375508622e-05, 1.20461554954846e-05, 1.23511024695861e-05, 1.26637690641937e-05, 1.298435069479682e-05, 1.33130477234602e-05, 1.365006558402692e-05, 1.399561491048999e-05, 1.434991166861259e-05, 1.471317729087917e-05, 1.508563881486164e-05, 1.546752902508709e-05, 1.58590865984955e-05, 1.626055625357853e-05, 1.667218890329205e-05, 1.709424181183834e-05, 1.752697875541563e-05, 1.797067018703517e-05, 1.842559340550909e-05, 1.889203272871418e-05, 1.937027967123992e-05, 1.986063312653177e-05, 2.036339955364299e-05, 2.087889316871209e-05, 2.14074361412849e-05, 2.194935879560423e-05, 2.250499981699251e-05, 2.3074706463456e-05, 2.365883478264328e-05, 2.425774983429249e-05, 2.487182591830697e-05, 2.550144680860119e-05, 2.614700599286261e-05, 2.680890691837988e-05, 2.748756324408982e-05, 2.818339909900093e-05, 2.889684934715482e-05, 2.962835985929001e-05, 3.037838779137869e-05, 3.11474018702091e-05, 3.193588268619233e-05, 3.274432299357597e-05, 3.357322801825163e-05, 3.442311577334851e-05, 3.529451738280969e-05, 3.618797741315293e-05, 3.710405421362292e-05, 3.804332026494656e-05, 3.900636253690956e-05, 3.999378285497619e-05, 4.100619827618165e-05, 4.204424147453077e-05, 4.310856113614315e-05, 4.419982236439165e-05, 4.531870709528572e-05, 4.646591452335915e-05, 4.764216153832746e-05, 4.884818317278624e-05, 5.008473306123056e-05, 5.135258391068e-05, 5.265252798320349e-05, 5.398537759064395e-05, 5.535196560185056e-05, 5.675314596273532e-05, 5.818979422947644e-05, 5.966280811520163e-05, 6.117310805049092e-05, 6.272163775804842e-05, 6.430936484189916e-05, 6.593728139147994e-05, 6.76064046009981e-05, 6.931777740444426e-05, 7.107246912665458e-05, 7.287157615082555e-05, 7.4716222602899e-05, 7.6607561053241e-05, 7.854677323605115e-05, 8.053507078695094e-05, 8.257369599920651e-05, 8.466392259905945e-05, 8.680705654064459e-05, 8.900443682099003e-05, 9.125743631560562e-05, 9.356746263517704e-05, 9.593595900390082e-05, 9.836440516000221e-05, 0.000100854318278997, 0.0001034072539202702, 0.0001060248069975568, 0.00010870861277393, 0.000111460347881912, 0.0001142817313693393, 0.0001171745257716345, 0.0001201405382111444, 0.0001231816215242297, 0.0001262996754168036, 0.0001294966476490338, 0.0001327745352499447, 0.0001361353857626675, 0.0001395812985211139, 0.0001431144259588604, 0.0001467369749510541, 0.0001504512081901713, 0.0001542594455964767, 0.0001581640657640585, 0.0001621675074433322, 0.0001662722710609278, 0.0001704809202779033, 0.0001747960835872396, 0.0001792204559516118, 0.0001837568004824415, 0.0001884079501612617, 0.0001931768096044994, 0.0001980663568726415, 0.0002030796453251203, 0.0002082198055217423, 0.0002134900471722404, 0.0002188936611346331, 0.0002244340214643664, 0.0002301145875144294, 0.0002359389060888899, 0.0002419106136503774, 0.0002480334385832338, 0.0002543112035134077, 0.0002607478276870118, 0.0002673473294085754, 0.0002741138285405327, 0.0002810515490657664, 0.00028816482171443, 0.0002954580866569406, 0.0003029358962645357, 0.0003106029179392508, 0.000318463937015044, 0.0003265238597318811, 0.0003347877162842179, 0.0003432606639463883, 0.0003519479902763435, 0.0003608551163996041, 0.0003699876003760327, 0.0003793511406506689, 0.0003889515795914102, 0.0003987949071152967, 0.0004088872644057331, 0.0004192349477229728, 0.0004298444123098735, 0.0004407222763958353, 0.0004518753253006693, 0.0004633105156416231, 0.0004750349796453199, 0.0004870560295678979, 0.000499381162225636, 0.0005120180636389236, 0.0005249746137923115, 0.0005382588915135432, 0.0005518791794745509, 0.0005658439693174043, 0.000580161966908079, 0.0005948420977215281, 0.0006098935123613797, 0.0006253255922164065, 0.0006411479552593191, 0.0006573704619891481, 0.0006740032215216661, 0.0006910565978324975, 0.0007085412161541831, 0.0007264679695337139, 0.000744848025552255, 0.0007636928332125476, 0.0007830141299970254, 0.0008028239491015332, 0.0008231346268483038, 0.0008439588102833189, 0.0008653094649614117, 0.0008871998829253972, 0.0009096436908812339, 0.0009326548585775012, 0.0009562477073904725, 0.000980436919123185, 0.001005237545019822, 0.001030665015005197, 0.001056735147150378, 0.001083464157372649, 0.001110868669374319, 0.001138965724826756, 0.001167772793804328, 0.001197307785476035, 0.001227589059058555, 0.001258635435039533, 0.001290466206676028, 0.00132310115177405, 0.001356560544757497, 0.001390865169032756, 0.00142603632965473, 0.001462095866302838, 0.001499066166574027, 0.001536970179599606, 0.001575831429993955, 0.001615674032142589, 0.001656522704837646, 0.001698402786268979, 0.001741340249378669, 0.001785361717588061, 0.001830494480904502, 0.001876766512418564, 0.001924206485199124, 0.001972843789595254, 0.002022708550956019, 0.002073831647775149, 0.002126244730272422, 0.002179980239420296, 0.002235071426426012, 0.00229155237267975, 0.00234945801017912, 0.002408824142440481, 0.002469687465907141, 0.002532085591867476, 0.002596057068891854, 0.002661641405800647, 0.00272887909517593, 0.002797811637426259, 0.002868481565418941, 0.002940932469691095, 0.003015209024250377, 0.003091357012980502, 0.003169423356662779, 0.003249456140626085, 0.003331504643040736, 0.003415619363866482, 0.003501852054470529, 0.003590255747928671, 0.003680884790023028, 0.003773794870950399, 0.003869043057756432, 0.003966687827508746, 0.004066789101224678, 0.004169408278568047, 0.004274608273329299, 0.004382453549704974, 0.004493010159390798, 0.004606345779505036, 0.00472252975135573, 0.004841633120069106, 0.00496372867509374, 0.005088890991596403, 0.005217196472766254, 0.005348723393041646, 0.005483551942277199, 0.005621764270865789, 0.005763444535832172, 0.005908678947914447, 0.006057555819648068, 0.006210165614469398, 0.006366600996854349, 0.006526956883508204, 0.006691330495621059, 0.006859821412205459, 0.007032531624531167, 0.007209565591672027, 0.007391030297179552, 0.007577035306898217, 0.007767692827935518, 0.007963117768803216, 0.008163427800738825, 0.008368743420225117, 0.008579188012715788, 0.008794887917582483, 0.009015972494292153, 0.009242574189825603, 0.009474828607348874, 0.009712874576143329, 0.009956854222804899, 0.01020691304371883, 0.01046319997881529, 0.01072586748661226, 0.01099507162054732, 0.01127097210660284, 0.01155373242222412, 0.0118435198765297, 0.01214050569181302, 0.01244486508632982, 0.01275677735836692, 0.01307642597158242, 0.01340399864161047, 0.01373968742391427, 0.014083688802876, 0.01443620378210455, 0.01479743797594085, 0.01516760170213807, 0.01554691007569105, 0.01593558310378296, 0.01633384578181921, 0.01674192819050918, 0.0171600655939552, 0.01758849853870526, 0.01802747295371714, 0.0184772402511812, 0.01893805742814232, 0.01941018716885469, 0.01989389794780138, 0.02038946413329953, 0.02089716609161055, 0.02141729029146518, 0.02195012940890657, 0.02249598243234893, 0.02305515476773919, 0.02362795834370168, 0.02421471171653919, 0.02481574017495076, 0.02543137584432102, 0.02606195779042245, 0.02670783212236487, 0.0273693520946105, 0.02804687820786767, 0.02874077830865666, 0.02945142768733591, 0.03017920917435551, 0.03092451323449813, 0.03168773805884711, 0.03246928965420844, 0.033269581929697, 0.03408903678017943, 0.03492808416624864, 0.03578716219038677, 0.03666671716895294, 0.03756720369961153, 0.03848908472379725, 0.03943283158378902, 0.04039892407394066, 0.04138785048559725, 0.04240010764519248, 0.04343620094500349, 0.04449664436600887, 0.04558196049226591, 0.04669268051619599, 0.04782934423413467, 0.04899250003147088, 0.05018270485666643, 0.05140052418341308, 0.05264653196014676, 0.05392131054610398, 0.05522545063306303, 0.05655955115187844, 0.05792421916286773, 0.05932006972907708, 0.06074772577139696, 0.06220781790446452, 0.06370098425223407, 0.06522787024205377, 0.06678912837603579, 0.06838541797845356, 0.0700174049178531, 0.07168576130250681, 0.07339116514778432, 0.07513430001396207, 0.07691585461293035, 0.0787365223822043, 0.08059700102458184, 0.08249799201173244, 0.08444020004994018, 0.08642433250616133, 0.08845109879249677, 0.09052120970711391, 0.09263537672959182, 0.09479431126859922, 0.09699872385975342, 0.09924932331144413, 0.1015468157963464, 0.1038919038862836, 0.106285285528047, 0.1087276529577119, 0.1112196915509469, 0.1137620786067471, 0.1163554820619828, 0.1190005591341004, 0.1216979548892736, 0.1244483007332663, 0.1272522128222315, 0.1301102903906471, 0.1330231139935695, 0.1359912436603692, 0.1390152169571172, 0.1420955469547862, 0.1452327201004519, 0.1484271939887095, 0.1516793950305494, 0.1549897160170064, 0.1583585135749464, 0.1617861055124526, 0.1652727680513774, 0.1688187329447328, 0.1724241844767607, 0.1760892563436643, 0.1798140284131935, 0.1835985233614903, 0.1874427031858329, 0.1913464655922215, 0.1953096402570289, 0.1993319849623061, 0.2034131816047133, 0.207552832078451, 0.2117504540330656, 0.2160054765074762, 0.2203172354421566, 0.2246849690720084, 0.2291078132031086, 0.2335847963772607, 0.2381148349290256, 0.2426967279407715, 0.2473291521021759, 0.2520106564815737, 0.2567396572176084, 0.2615144321407274, 0.26633311533527, 0.2711936916541565, 0.2760939911995101, 0.2810316837839966, 0.2860042733891373, 0.2910090926384538, 0.2960432973049772, 0.3011038608743724, 0.3061875691868037, 0.311291015182536, 0.3164105937782819, 0.3215424969033616, 0.3266827087268576, 0.3318270011091728, 0.3369709293136188, 0.3421098280159879, 0.3472388076523997, 0.3523527511480592, 0.3574463110719799, 0.3625139072650711, 0.3675497249913663, 0.3725477136644905, 0.3775015862036978, 0.3824048190760102, 0.3872506530829998, 0.3920320949526809, 0.3967419197986822, 0.4013726745103425, 0.4059166821386192, 0.4103660473435712, 0.4147126629697421, 0.4189482178158784, 0.4230642056650462, 0.4270519356403309, 0.4309025439497585, 0.4346070070819046, 0.4381561565106964, 0.4415406949641016, 0.4447512143067155, 0.4477782150805032, 0.4506121277411552, 0.4532433356195112, 0.4556621996282033, 0.4578590847230459, 0.4598243881165636, 0.4615485692273967, 0.463022181334043, 0.4642359048843765, 0.4651805823936268, 0.4658472548428887, 0.4662271994677468, 0.466311968802215, 0.466093430816893, 0.4655638099620527, 0.4647157288963412, 0.4635422506499891, 0.4620369209379809, 0.4601938103037269, 0.458007555737597, 0.4554734013774996, 0.4525872378608523, 0.4493456398591823, 0.4457459012887234, 0.4417860676532488, 0.4374649649397238, 0.4327822244538403, 0.427738302952023, 0.4223344974000172, 0.4165729536667135, 0.4104566684466929, 0.4039894836973349, 0.3971760728776536, 0.3900219182879222, 0.3825332788331289, 0.3747171475713063, 0.3665811984614689, 0.3581337217973032, 0.3493835479038377, 0.340339958786964, 0.331012587562023, 0.3214113056494534, 0.3115460979146121, 0.3014269261469954, 0.2910635815224177, 0.2804655269715629, 0.2696417306901078, 0.2586004923696034, 0.2473492641039923, 0.2358944683325861, 0.2242413156146539, 0.2123936254899639, 0.2003536541594745, 0.1881219332151132, 0.1756976636109644, 0.1630808968255594, 0.1502720590140906, 0.1372715781798541, 0.1240798842335264, 0.1106974091587776, 0.09712458728191832, 0.08336185564284852, 0.06940965446414925, 0.05526842771475085, 0.04093862376421444, 0.02642069612328368, 0.01171510426600044, -0.003177685471666693, -0.01825719892205816, -0.03352295305060537, -0.04897445486112494, -0.0646112002478502, -0.08043267280066242, -0.09643834257019926, -0.112627664799703, -0.1290000786306327, -0.1455550057891977, -0.1622918492610786, -0.1792099919616769, -0.1963087954092871, -0.2135875984086027, -0.2310457157519598, -0.2486824369456806, -0.2664970249688105, -0.2844887150714383, -0.3026567136196605, -0.3210001969940831, -0.3395183105485606, -0.3582101676356449, -0.3770748487049572, -0.3961114004804047, -0.4153188352218415, -0.4346961300764191, -0.4542422265244825, -0.4739560299244546, -0.4938364091606939, -0.5138821963978383, -0.534092186944626, -0.5544651392296489, -0.5749997748909129, -0.5956947789804807, -0.6165488002848325, -0.6375604517609208, -0.6587283110872008, -0.6800509213281953, -0.7015267917104065, -0.7231543985066086, -0.7449321860247525, -0.7668585676968895, -0.7889319272626627, -0.8111506200410395, -0.8335129742830647, -0.8560172925974819, -0.8786618534401414, -0.9014449126571431, -0.9243647050706867, -0.9474194460956106, -0.9706073333735834, -0.9939265484108967, -1.017375258204767, -1.040951616842014, -1.064653767052926, -1.088479841702077, -1.112427965196777, -1.136496254792793, -1.160682821775922, -1.184985772496903, -1.209403209236145, -1.233933230873679, -1.258573933338722, -1.2833234098122, -1.308179750654618, -1.333141043030631, -1.358205370200765, -1.383370810449766, -1.40863543562018, -1.433997309218894, -1.459454484063533, -1.485004999434832, -1.510646877700368, -1.536378120374315, -1.562196703577298, -1.588100572859793, -1.614087637352057, -1.640155763203081, -1.666302766270726, -1.692526404024884, -1.718824366625327, -1.74519426713575, -1.771633630835552, -1.798139883590932, -1.824710339247097, -1.851342186003703, -1.878032471736065, -1.904778088225253, -1.93157575426091, -1.958421997581482, -1.985313135617585, -2.01224525500539, -2.039214189838331, -2.066215498626948, -2.093244439938457, -2.120295946689617, -2.147364599068652, -2.174444596064395, -2.201529725583553, -2.228613333139888, -2.255688289102368, -2.282746954492841, -2.309781145327649, -2.336782095501705, -2.363740418218131, -2.390646065971358, -2.417488289096891, -2.44425559290658, -2.470935693434314, -2.4975154718236, -2.52398092739549, -2.550317129442788, -2.576508167804507, -2.602537102283094, -2.628385910976061, -2.654035437603383, -2.679465337922398, -2.704654025332899, -2.729578615786863, -2.75421487212961, -2.778537148012369, -2.802518331530151, -2.826129788753571, -2.849341307338827, -2.872121040416536, -2.89443545097747, -2.916249256991514, -2.937525377515524, -2.958224880065928, -2.978306929553325, -2.997728739098628, -3.016445523073787, -3.034410452734657, -3.0515746148393, -3.06788697367179, -3.083294336919637, -3.097741325882067, -3.111170350535814, -3.123521590000843, -3.134732978832982, -3.144740199865809, -3.153476684499467, -3.160873620622505, -3.166859969215182, -3.171362490291792, -3.174305779003541, -3.175612312553783, -3.175202508920828, -3.172994798705099, -3.168918023311174, -3.162938281176024, -3.15500657109929, -3.145002864438455, -3.132842785485344, -3.118429335340455, -3.101666248349336, -3.082456357814898, -3.060701961311042, -3.036305073479516, -3.009167697814931, -2.979192124388277, -2.946281253861187, -2.910338949101777, -2.871270415589594, -2.828982611776675, -2.783384690531741, -2.734388472741461, -2.681908954074816, -2.625864845832506, -2.566179150701204, -2.502779774110725, -2.4356001717483, -2.364580033616801, -2.289666004831969, -2.210812443132069, -2.12798221282362, -2.041147514604038, -1.95029075038525, -1.855405421888773, -1.756497061390433, -1.653584192559599, -1.54669931886129, -1.435889936468064, -1.321219568060155, -1.202768813275518, -1.080636410904836, -0.9549403072092841, -0.8258187239704062, -0.6934312190617781, -0.5579597314619266, -0.4196096017084402, -0.2786105578265605, -0.1352176557548253, 0.01028783776029933, 0.1575976329140496, 0.3063756894635103, 0.456257589577102, 0.6068500274144233, 0.7577304330774656, 0.9084467496981679, 1.058517383519142, 1.207431347865658, 1.354648622876638, 1.499600753736668, 1.641691710903207, 1.78029903642361, 1.914775300852398, 2.044449895474375, 2.168631184474494, 2.286609041328183, 2.397657792970659, 2.501039594191887, 2.596008253143969, 2.68181352678618, 2.757705902474076, 2.822941878666082, 2.876789753816583, 2.918535927891774, 2.947491715528073, 2.963000663600673, 2.964446358833789, 2.951260703023278, 2.92293262442369, 2.879017183853577, 2.819145023086885, 2.743032091132802, 2.650489571089599, 2.541433916441499, 2.415896891029956, 2.274035491581714, 2.116141615760827, 1.942651322414728, 1.754153514233988, 1.551397856717371, 1.335301731456274, 1.106956006708196, 0.8676293944586789, 0.6187711511757108, 0.3620118698015674, 0.09916210382607393, -0.1677914387780811, -0.4366923926700977, -0.7052241777607213, -0.9709226293683176, -1.231192038714177, -1.483324799680197, -1.724524822390834, -1.951934829548721, -2.162667596831441, -2.353841133435478, -2.522617722570914, -2.666246654101369, -2.782110382569821, -2.86777373383744, -2.921035663138547, -2.9399829375956, -2.92304497870929, -2.869048957158816, -2.777274086147067, -2.647503913899843, -2.48007527487967, -2.275922427638407, -2.036614790585217, -1.764386591585537, -1.462156680202465, -1.133536720097587, -0.7828259916266581, -0.4149910992648606, -0.03562900346795703, 0.3490880100707895, 0.7324865417150311, 1.107486236579308, 1.466714219696053, 1.802639609771998, 2.107727297787681, 2.374609475642965, 2.596272621644978, 2.766256806482718, 2.878863293437918, 2.929365493864828, 2.914217433649195, 2.831253025118174, 2.679868664783432, 2.461181039361324, 2.178151574807956, 1.835668763446483, 1.440579712292028, 1.001662730358629, 0.5295336691978805, 0.03648009669191317, -0.463780745271422, -0.9564218545412962, -1.425902995129514, -1.856439174610261, -2.232532582634067, -2.539555348528342, -2.76436614197829, -2.895939310325102, -2.925981147459201, -2.849504291156118, -2.665328450360983, -2.376473997216442, -1.990414755047722, -1.519157896661415, -0.979122525595033, -0.3907944710350225, 0.2218567860582173, 0.8322023232021336, 1.412009311698736, 1.93270308087348, 2.366794601022297, 2.68941544045314, 2.879885937927452, 2.923227790112885, 2.811520904356931, 2.544997727757683, 2.132767850233479, 1.593072879201878, 0.9529875020075376, 0.2475079419799751, -0.4819963039138704, -1.189948264383619, -1.829277469637545, -2.354558440913892, -2.725395418216521, -2.909824902622844, -2.887467428062323, -2.65213571465495, -2.213603788941316 + ], + "label": "2P", + "angular_momentum": 1 + } + ], + "ae_local_potential": [ + -88810.10990181135, -87706.67963584395, -86616.95640079046, -85540.7699251782, -84477.95205268126, -83428.3367158445, -82391.7599101366, -81368.05966832254, -80357.07603515715, -79358.65104239095, -78372.62868408755, -77398.85489224755, -76437.17751273514, -75487.4462815032, -74549.5128011147, -73623.23051755535, -72708.45469733405, -71805.04240486765, -70912.85248014785, -70031.7455166839, -69161.5838397203, -68302.23148472465, -67453.5541761434, -66615.4193064207, -65787.69591527805, -64970.2546692519, -64162.9678414845, -63365.7092917668, -62578.35444682875, -61800.780280874, -61032.86529635725, -60274.48950500035, -59525.534409043, -58785.88298272815, -58055.41965401545, -57334.0302865243, -56621.6021616983, -55918.02396119395, -55223.1857494863, -54536.97895669125, -53859.2963616016, -53190.03207493355, -52529.08152278075, -51876.34143027525, -51231.7098054498, -50595.08592330175, -49966.37031005476, -49345.464727615385, -48732.272158223495, -48126.6967892929, -47528.64399844051, -46938.020338701455, -46354.73352392751, -45778.69241436779, -45209.807002427064, -44647.9883986029, -44093.148817595924, -43545.20156459295, -43004.061021721485, -42469.64263467123, -41941.86289948234, -41420.639349497906, -40905.890542478264, -40397.53604787559, -39895.49643426638, -39399.69325694, -38910.049045642, -38426.487292468555, -37948.932439912656, -37477.309869057455, -37011.54588791739, -36551.567719923274, -36097.30349255134, -35648.68222609249, -35205.63382256155, -34768.08905474496, -34335.97955538324, -33909.23780648859, -33487.79712879538, -33071.59167134092, -32660.556401176524, -32254.62709320576, -31853.7403201492, -31457.83344263375, -31066.84459940514, -30680.71269766206, -30299.377403510083, -29922.77913253465, -29550.85904049066, -29183.559014107916, -28820.82166201106, -28462.590305751834, -28108.808970953014, -27759.42237856238, -27414.37593621504, -27073.61572970346, -26737.08851455332, -26404.741707703713, -26076.523379291077, -25752.382244535074, -25432.267655725165, -25116.12959430685, -24803.918663066193, -24495.58607841138, -24191.08366275001, -23890.363836961675, -23593.37961296313, -23300.084586366716, -23010.43292922931, -22724.37938289175, -22441.879250907074, -22162.88839205655, -21887.36321345251, -21615.26066372707, -21346.53822630489, -21081.15391276035, -20819.06625625625, -20560.234305065074, -20304.617616169755, -20052.176248944626, -19802.87075891451, -19556.66219159147, -19313.512076388164, -19073.38242060654, -18836.235703501654, -18602.0348704188, -18370.74332700372, -18142.32493348461, -17916.74399902531, -17693.965276148425, -17473.95395522786, -17256.675659049815, -17042.096437441283, -16830.1827619651, -16620.90152068128, -16414.220012973026, -16210.10594443737, -16008.52742183893, -15809.452948126685, -15612.85141751232, -15418.69211061014, -15226.944689636824, -15037.57919367132, -14850.566033973246, -14665.875989359574, -14483.48020163898, -14303.35017110243, -14125.4577520702, -13949.775148493974, -13776.27490961373, -13604.92992566854, -13435.71342366038, -13268.598963171145, -13103.560432231056, -12940.572043238575, -12779.60832893121, -12620.64413840621, -12463.65463319045, -12308.615283359644, -12155.5018637053, -12004.29044994963, -11854.957415007246, -11707.479425293455, -11561.83343707825, -11417.99669288573, -11275.946717938255, -11135.66131664463, -10997.11856913205, -10860.296827821076, -10725.17471404316, -10591.731114700186, -10459.945178965505, -10329.796315025935, -10201.26418686437, -10074.328711081986, -9948.9700537605, -9825.16862736282, -9702.9050876726, -9582.160330771605, -9462.915490054706, -9345.151933282015, -9228.85125966742, -9113.99529700354, -9000.566098822264, -8888.545941590544, -8777.917321941155, -8668.662953937615, -8560.76576637339, -8454.208900104404, -8348.97570541473, -8245.04973941518, -8142.414763473915, -8041.054740679275, -7940.9538333339, -7842.09640048004, -7744.46699545577, -7648.05036348126, -7552.83143927531, -7458.795344701315, -7365.92738644253, -7274.213053706295, -7183.638015956615, -7094.188120675015, -7005.84939114924, -6918.6080242893, -6832.45038847078, -6747.363021404825, -6663.332628034705, -6580.346078458395, -6498.390405877015, -6417.4528045688, -6337.5206278881, -6258.58138628945, -6180.622745375935, -6103.632523972025, -6027.59869222022, -5952.50936970133, -5878.35282357824, -5805.117466762565, -5732.79185610418, -5661.36469060324, -5590.82480964436, -5521.161191252755, -5452.362950372105, -5384.419337163665, -5317.319735326705, -5251.053660439615, -5185.610758321765, -5120.98080341561, -5057.153697189, -4994.119466557246, -4931.8682623248105, -4870.390357646415, -4809.67614650718, -4749.716142221683, -4690.500975951687, -4632.021395242256, -4574.268262576049, -4517.232553945584, -4460.90535744323, -4405.2778718687805, -4350.341405354196, -4296.087374005543, -4242.50730056177, -4189.592813070121, -4137.335643578047, -4085.7276268413216, -4034.7606990482604, -3984.4268965597266, -3934.7183546648484, -3885.627306352166, -3837.1460810960266, -3789.26710365809, -3741.982892903746, -3695.2860606331205, -3649.169310426774, -3603.6254365055793, -3558.647322604881, -3514.22794086259, -3470.360350721077, -3427.037697842739, -3384.2532130390623, -3342.000211212889, -3300.272090313971, -3259.062330307369, -3218.364492154736, -3178.172216808253, -3138.4792242170497, -3099.279312345956, -3060.566356206483, -3022.3343068998265, -2984.577190671738, -2947.2891079791734, -2910.4642325684995, -2874.096810565213, -2838.1811595748964, -2802.7116677954054, -2767.682793140036, -2733.089062371648, -2698.9250702474756, -2665.185478674622, -2631.8650158760106, -2598.958475566726, -2566.4607161405706, -2534.366659866723, -2502.6712920964214, -2471.369660479437, -2440.456874190342, -2409.9281031643663, -2379.7785773427477, -2350.003585927464, -2320.5984766452293, -2291.5586550206417, -2262.8795836583463, -2234.5567815341155, -2206.5858232947794, -2178.962338566807, -2151.682011273496, -2124.740578960678, -2098.1338321307535, -2071.85761358504, -2045.9078177742745, -2020.2803901571986, -1994.971326567114, -1969.976672586303, -1945.292522928234, -1920.9150208274284, -1896.8403574369515, -1873.064771233333, -1849.584547428934, -1826.396017391583, -1803.495558071467, -1780.8795914350965, -1758.5445839063475, -1736.487045814427, -1714.703530848717, -1693.1906355203805, -1671.9449986306774, -1650.9633007458756, -1630.242263678694, -1609.7786499761955, -1589.569262414053, -1569.6109434970824, -1549.9005749660134, -1530.43507731037, -1511.211409287413, -1492.22656744708, -1473.4775856628105, -1454.961534668216, -1436.675521599523, -1418.6166895436716, -1400.782217092079, -1383.1693178999185, -1365.7752402508954, -1348.597266627436, -1331.632713286208, -1314.878929838939, -1298.3332988384525, -1281.99323536982, -1265.8561866466425, -1249.919631612323, -1234.1810805463106, -1218.6380746752516, -1203.288185788959, -1188.1290158611835, -1173.1581966750775, -1158.373389453347, -1143.7722844929815, -1129.352600804547, -1115.11208575596, -1101.048514720692, -1087.1596907303644, -1073.4434441316484, -1059.897632247464, -1046.520139042364, -1033.3088747921036, -1020.261775757322, -1007.376803861288, -994.65194637164, -982.0852155861145, -969.6746485221705, -957.4183066104845, -945.3142753922665, -933.3606642203305, -921.555605963915, -909.897256717144, -898.3837955111585, -887.0134240297995, -875.784366328857, -864.694868558798, -853.7431986909716, -842.9276462471975, -832.24652203275, -821.698157872646, -811.2809063512435, -800.993140555058, -790.833253818808, -780.7996594746135, -770.890790604321, -761.105099794917, -751.441058896988, -741.8971587862, -732.471909127738, -723.1638381436925, -713.9714923833415, -704.893436496294, -695.928253008469, -687.074542100861, -678.330921391066, -669.6960257175325, -661.168506926501, -652.7470336615985, -644.430291156064, -636.216981027551, -628.105821075505, -620.095545081053, -612.1849026093965, -604.3726588146635, -596.6575942471985, -589.038504663252, -581.514200837036, -574.083508375139, -566.7452675332385, -559.498333035102, -552.34157389385, -545.273873235438, -538.294128124344, -531.401249391425, -524.5941614639155, -517.8718021975525, -511.233122710778, -504.677087221017, -498.20267288298885, -491.8088696290292, -485.494680011405, -479.2591190465844, -473.1012140614463, -467.02000454140443, -461.014541980416, -455.0838897328541, -449.2271228672217, -443.44332802168225, -437.7316032613786, -432.09105793753236, -426.52081254827624, -421.01999860122703, -415.58775847774314, -410.22324529888414, -404.92562279300734, -399.69406516501664, -394.5277569672218, -389.42589297179126, -384.3876780447832, -379.41232702172346, -374.49906458472367, -369.64712514110715, -364.85575270352734, -360.12420077156236, -355.4517322147631, -350.83761915712796, -346.2811428630034, -341.7815936243704, -337.3382706495141, -332.9504819530525, -328.61754424729804, -324.33878283494965, -320.11353150308315, -315.94113241842894, -311.82093602391996, -307.7523009364883, -303.7345938460956, -299.76718941597994, -295.8494701841035, -291.98082646578155, -288.1606562574777, -284.3883651417475, -280.6633661933172, -276.9850798862805, -273.3529340023925, -269.76636354045155, -266.2248106267512, -262.72772442658453, -259.2745610567882, -255.86478349931105, -252.49786151578914, -249.17327156311504, -245.8904967099872, -242.6490265544232, -239.4483571422226, -236.28799088636364, -233.16743648732606, -230.08620885431705, -227.0438290273933, -224.0398241004627, -221.07372714515094, -218.1450771355264, -215.25341887365695, -212.3983029160031, -209.5792855006189, -206.79592847515525, -204.0477992256558, -201.3344706061251, -198.6555208688631, -196.01053359555374, -193.39909762908965, -190.8208070061303, -188.27526089037494, -185.76206350654124, -183.2808240750414, -180.8311567473386, -178.4126805419789, -176.0250192812858, -173.66780152870496, -171.34066052679404, -169.0432341358406, -166.77516477310596, -164.53609935267946, -162.32568922593646, -160.1435901225917, -157.98946209233625, -155.86296944705305, -153.76378070359814, -151.6915685271453, -149.64600967508, -147.62678494143884, -145.6335791018865, -143.6660808592224, -141.72398278940895, -139.80698128811764, -137.91477651778584, -136.0470723551747, -134.2035763394272, -132.38399962061985, -130.58805690879785, -128.8154664234985, -127.065949843747, -125.33923225852885, -123.63504211773285, -121.9531111835577, -120.29317448238325, -118.65497025709985, -117.0382399198975, -115.44272800550725, -113.86818212489715, -112.31435291941835, -110.78099401540246, -109.2678619792066, -107.77471627270805, -106.30131920924686, -104.847435910016, -103.41283426090214, -101.9972848697738, -100.6005610242226, -99.22243864975525, -97.86269626844074, -96.5211149580138, -95.19747831143636, -93.8915723969226, -92.60318571842805, -91.3321091766081, -90.0781360302474, -88.8410618581683, -87.62068452161886, -86.41680412714776, -85.22922298996905, -84.05774559782515, -82.90217857534995, -81.7623306489408, -80.638012612144, -79.5290372915609, -78.4352195132798, -77.35637606984194, -76.2923256877473, -75.24288899550785, -74.2078884922541, -73.18714851690444, -72.1804952179022, -71.18775652352905, -70.2087621128016, -69.24334338695955, -68.2913334415509, -67.35256703912295, -66.4268805825264, -65.5141120888388, -64.6141011639142, -63.726688977567, -62.85171823939525, -61.9890331752508, -61.1384795043616, -60.29990441711165, -59.4731565534857, -58.6580859821812, -57.8545441803941, -57.06238401428235, -56.28145972010965, -55.51162688607375, -54.752742434821, -54.00466460665095, -53.26725294340925, -52.54036827307255, -51.823872695024, -51.1176295660179, -50.4215034868347, -49.735360289619535, -49.05906702590694, -48.39249195532174, -47.73550453495601, -47.08797540941377, -46.4497764015175, -45.820780503668324, -45.200861869851764, -44.589895808279195, -43.98775877465418, -43.39432836605188, -42.80948331539963, -42.23310348654327, -41.665069869886736, -41.10526457858672, -40.55357084528729, -40.00987301937518, -39.474056564738206, -38.946008058005454, -38.42561518724948, -37.91276675112779, -37.407352658441134, -36.909263928084854, -36.418392689368545, -35.934632182678826, -35.45787676045822, -34.98802188847424, -34.52496414735015, -34.068601234328995, -33.618831965242876, -33.175556276656906, -32.73867522815857, -32.30809100476228, -31.883706919397845, -31.465427415453156, -31.053158069338945, -30.64680559304541, -30.246277836656386, -29.851483790804323, -29.462333589007613, -29.07873850988746, -28.700610979220464, -28.32786457179993, -27.960414013077155, -27.59817518055457, -27.241065104902386, -26.889001970773077, -26.541905117287065, -26.1996950381654, -25.86229338148581, -25.529622949039574, -25.20160769526821, -24.878172725760393, -24.55924429529074, -24.24474980538398, -23.934617801388885, -23.62877796904907, -23.32716113055862, -23.029699240092263, -22.73632537880269, -22.44697374927726, -22.161579669450877, -21.880079565971716, -21.602410967018706, -21.328512494572603, -21.05832385614257, -20.791785835954226, -20.52884028560481, -20.2694301141942, -20.01349927794246, -19.76099276930476, -19.511856605598464, -19.26603781715682, -19.0234844350258, -18.784145478223145, -18.547970940577976, -18.31491177717288, -18.08491989040979, -17.857948115722866, -17.63395020696286, -17.412880821477334, -17.194695504913085, -16.979350675766796, -16.766803609710934, -16.55701242372256, -16.34993606004227, -16.145534269991725, -15.943767597677455, -15.744597363608735, -15.547985648257995, -15.353895275590466, -15.162289796590985, -14.97313347281446, -14.786391259985844, -14.60202879167582, -14.4200123630761, -14.240308914898986, -14.06288601742388, -13.88771185471235, -13.71475520901361, -13.54398544537907, -13.375372496505506, -13.20888684782391, -13.04449952284972, -12.88218206881013, -12.72190654256107, -12.56364549680667, -12.40737196663191, -12.253059456357365, -12.10068192672514, -11.95021378242177, -11.801629859944345, -11.654905415813674, -11.510016115137285, -11.366938020524515, -11.225647581353446, -11.08612162338998, -10.948337338756724, -10.81227227624911, -10.677904331995645, -10.54521174045688, -10.41417306575858, -10.284767193352545, -10.15697332199813, -10.030770956057575, -9.9061398980965, -9.783060241781556, -9.66151236506625, -9.54147692365535, -9.422934844738895, -9.305867320985485, -9.19025580478538, -9.076082002733164, -8.963327870339786, -8.851975606964265, -8.74200765095463, -8.63340667498845, -8.52615558160323, -8.42023749890658, -8.31563577645748, -8.21233398130889, -8.110315894203085, -8.00956550591123, -7.910067013708435, -7.811804817976995, -7.71476351892955, -7.61892791344531, -7.52428299201224, -7.43081393576852, -7.33850611363734, -7.247345079548795, -7.157316569743645, -7.068406500153595, -6.98060096385319, -6.893886228579, -6.80824873431157, -6.72367509091645, -6.640152075840655, -6.557666631860915, -6.476205864881055, -6.395757041775215, -6.3163075882746, -6.23784508689523, -6.1603572749044, -6.083832042324065, -6.008257429969045, -5.93362162753279, -5.85991297165738, -5.7871199440584, -5.715231169772215, -5.644235415266565, -5.57412158668168, -5.50487872807456, -5.43649601968688, -5.368962776239235, -5.302268445247005, -5.236402605360425, -5.17135496472673, -5.10711535937405, -5.043673751616835, -4.981020228481897, -4.919145000155103, -4.858038398448032, -4.7976908752842355, -4.738093001204957, -4.679235463893686, -4.621109066719547, -4.563704727298929, -4.507013476075323, -4.4510264549169865, -4.395734915732099, -4.341130219101372, -4.287203832927705, -4.233947331102224, -4.181352392188991, -4.129410798062006, -4.078114432800065, -4.027455281241013, -3.977425427787702, -3.928017055172444, -3.8792224432354145, -3.8310339677183793, -3.7834440990732787, -3.7364454012857276, -3.690030530713109, -3.64419223493704, -3.5989233516302126, -3.5542168074371965, -3.510065616869231, -3.466462881212726, -3.423401787451282, -3.380875607201182, -3.338877695660017, -3.297401490568437, -3.2564405111848047, -3.2159883572725096, -3.1760387080999757, -3.136585321452981, -3.097622032659319, -3.05914275362557, -3.021141471885786, -2.9836122496620723, -2.9465492229367585, -2.90994660053615, -2.873798663225663, -2.8380997628161433, -2.8028443212813716, -2.7680268298864417, -2.733641848327035, -2.6996840038793724, -2.666147990560683, -2.6330285683001713, -2.600320562120219, -2.5680188613277934, -2.53611841871591, -2.5046142497749657, -2.473501431913934, -2.442775103691175, -2.4124304640548346, -2.382462771592692, -2.3528673437912655, -2.3236395563042076, -2.2947748422297063, -2.266268691396913, -2.2381166496612335, -2.210314318208331, -2.182857352866839, -2.155741463429543, -2.1289624129830473, -2.1025160172457555, -2.076398143914046, -2.050604712016621, -2.0251316912768216, -1.9999751014828995, -1.9751310118661165, -1.9505955404865234, -1.9263648536264335, -1.9024351651913685, -1.8788027361184876, -1.85546387379236, -1.832414931467962, -1.809652307700895, -1.787172445784629, -1.7649718331947655, -1.7430470010402166, -1.721394523521152, -1.700011017393743, -1.6788931414414965, -1.658037595953196, -1.6374411222073235, -1.6171005019628515, -1.5970125569564175, -1.5771741484056825, -1.5575821765189104, -1.5382335800106166, -1.5191253356232215, -1.500254457654681, -1.4816179974919435, -1.463213043150231, -1.4450367188180444, -1.4270861844077904, -1.4093586351120355, -1.3918513009652276, -1.3745614464108915, -1.3574863698741995, -1.3406234033398285, -1.323969911935093, -1.307523293518225, -1.2912809782717904, -1.275240428301158, -1.259399137237928, -1.243754629848328, -1.2283044616464336, -1.2130462185122206, -1.1979775163143604, -1.183096000537675, -1.1683993459152604, -1.1538852560651416, -1.1395514631314625, -1.125395727430138, -1.1114158370988805, -1.09760960775161, -1.0839748821371236, -1.0705095298020244, -1.0572114467578444, -1.044078555152271, -1.031108802944499, -1.018300163584578, -1.0056506356967656, -0.9931582427668125, -0.980821032833118, -0.9686370781817425, -0.9566044750451865, -0.9447213433049265, -0.932985826197651, -0.9213960900251205, -0.9099503238676645, -0.898646739301208, -0.8874835701178315, -0.876459072049801, -0.8655715224970135, -0.8548192202578455, -0.8442004852633285, -0.8337136583146385, -0.8233571008238465, -0.813129194557874, -0.803028341385655, -0.7930529630284056, -0.783201500813067, -0.773472415428586, -0.763864185034308, -0.75437531177394, -0.745004311183221, -0.7357497190249515, -0.7266100892509525, -0.717583993776124, -0.708670022255245, -0.6998667801775466, -0.691172895535166, -0.682587008050037, -0.6741077761608245, -0.6657338749716095, -0.657463998468357, -0.6492968493773575, -0.6412311542509395, -0.633265652807212, -0.625399100419775, -0.617630266693912, -0.609957940300261, -0.602380921075735, -0.5948980250956635, -0.5875080831423285, -0.580209940522264, -0.5730024568858375, -0.5658845060490705, -0.5588549758176605, -0.5519127678132105, -0.545056797301596, -0.538285993023475, -0.5315992970269064, -0.524995664502036, -0.5184740636190275, -0.5120334753620045, -0.5056728933772465, -0.4993913238108787, -0.493187785154875, -0.48706130809370085, -0.4810109353528506, -0.4750357215492725, -0.46913473304365516, -0.46330704779453685, -0.45755175521424307, -0.4518679560265977, -0.4462547621264129, -0.44071129644072377, -0.43523669279173643, -0.42983009576149495, -0.42449066055821216, -0.41921755288427376, -0.4140099488058791, -0.4088670346242944, -0.40378800674871673, -0.39877207157070693, -0.39381844534018706, -0.3889263540429823, -0.3840950332798729, -0.3793237281471615, -0.3746116931187133, -0.3699581919294676, -0.36536249746039734, -0.3608238916248903, -0.35634166525655225, -0.35191511799839337, -0.34754355819339794, -0.3432263027764536, -0.33896267716761674, -0.3347520151667131, -0.33059365884923736, -0.32648695846355424, -0.32243127232937446, -0.3184259667374873, -0.314470415850746, -0.3105640016062764, -0.3067061136189045, -0.30289614908578455, -0.2991335126922057, -0.2954176165185775, -0.2917478799485621, -0.2881237295783538, -0.2845445991270849, -0.28100992934833957, -0.2775191679427746, -0.27407176947181777, -0.2706671952724438, -0.2673049133730082, -0.2639843984101228, -0.2607051315465699, -0.25746660039022984, -0.2542682989140202, -0.25110972737682913, -0.247990392245427, -0.24490980611735405, -0.24186748764475974, -0.2388629614591927, -0.2358957580973246, -0.23296541392759235, -0.23007147107775855, -0.2272134773633647, -0.22439098621707784, -0.22160355661891495, -0.2188507530273291, -0.21613214531115965, -0.2134473086824193, -0.21079582362992216, -0.2081772758537349, -0.20559125620043944, -0.20303736059920424, -0.2005151899986454, -0.19802435030447474, -0.19556445231792335, -0.1931351116749251, -0.19073594878606295, -0.18836658877725396, -0.18602666143117574, -0.1837158011294207, -0.1814336467953645, -0.17917984183775004, -0.1769540340949669, -0.17475587578002655, -0.17258502342622084, -0.17044113783345194, -0.1683238840152352, -0.16623293114635335, -0.16416795251116634, -0.1621286254525618, -0.16011463132153714, -0.1581256554274126, -0.1561613869886577, -0.15422151908433224, -0.1523057486061302, -0.150413776211016, -0.14854530627445425, -0.1467000468442157, -0.14487770959475976, -0.1430780097821843, -0.1413006661997315, -0.1395454011338511, -0.1378119403208052, -0.13610001290381474, -0.1344093513907384, -0.13273969161227456, -0.13109077268068636, -0.1294623369490358, -0.1278541299709264, -0.1262659004607465, -0.12469740025440325, -0.1231483842705489, -0.12161861047228414, -0.12010783982934035, -0.1186158362807315, -0.11714236669786705, -0.1156872008481274, -0.11425011135888764, -0.11283087368199116, -0.1114292660586639, -0.1100450694848627, -0.10867806767705715, -0.1073280470384334, -0.1059947966255198, -0.1046781081152273, -0.1033777757722968, -0.1020935964171546, -0.10082536939416364, -0.09957289654027116, -0.0983359821540459, -0.0971144329650981, -0.0959080581038822, -0.09471666907187146, -0.09354007971210555, -0.0923781061801036, -0.09123056691513685, -0.0900972826118608, -0.0889780761922972, -0.0878727727781658, -0.08678119966355965, -0.0857031862879585, -0.08463856420957955, -0.0835871670790568, -0.0825488306134494, -0.0815233925705724, -0.08051069272364506, -0.07951057283625615, -0.0785228766376385, -0.0775474497982513, -0.07658413990566675, -0.075632796440754, -0.0746932707541614, -0.0737654160430887, -0.0728490873283495, -0.07194414143171815, -0.07105043695355685, -0.07016783425072295 + ], + "aug_multipoles": [ + -0.0981138722783857, -0.0958004681236609, 0.0, 0.0, -0.0958004681236609, -0.08747544885960724, 0.0, 0.0, 0.0, 0.0, 0.2390827668467262, 0.1572942928639029, 0.0, 0.0, 0.1572942928639029, 0.1034693219640171, 0.0, 0.0, -0.05654096143957252, -0.03802282243047831, 0.0, 0.0, -0.02978342638786141, -0.02033850149835275, -0.05654096143957252, -0.02978342638786141, 0.0, 0.0, -0.03802282243047831, -0.02033850149835275, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.07621920167682379, 0.04918892796424668, 0.0, 0.0, 0.04918892796424668, 0.03169165342376241 + ] + }, + "header": { + "pseudo_type": "PAW", + "number_of_proj": 4, + "cutoff_radius_index": 781, + "z_valence": 7.0, + "mesh_size": 1105, + "element": "F", + "number_of_wfc": 2, + "paw_core_energy": -70.2255724304084, + "core_correction": true + }, + "augmentation": [ + { + "i": 0, + "radial_function": [ + -4.687793097543027e-08, -4.806465136610316e-08, -4.92814137222953e-08, -5.052897855976923e-08, -5.180812564682486e-08, -5.311965449167879e-08, -5.446438484218225e-08, -5.584315719818945e-08, -5.7256833336896e-08, -5.870629685147723e-08, -6.01924537033611e-08, -6.171623278848264e-08, -6.327858651787297e-08, -6.488049141294582e-08, -6.652294871585443e-08, -6.820698501529826e-08, -6.993365288817341e-08, -7.170403155746598e-08, -7.351922756679943e-08, -7.538037547205959e-08, -7.728863855052633e-08, -7.924520952795833e-08, -8.125131132408343e-08, -8.330819781696111e-08, -8.541715462669552e-08, -8.757949991898656e-08, -8.979658522902484e-08, -9.206979630624236e-08, -9.440055398044801e-08, -9.67903150498907e-08, -9.924057319180145e-08, -1.017528598959881e-07, -1.043287454220627e-07, -1.069698397809014e-07, -1.096777937409508e-07, -1.124542998600073e-07, -1.153010935431171e-07, -1.182199541272572e-07, -1.212127059934735e-07, -1.242812197071756e-07, -1.27427413187294e-07, -1.306532529050386e-07, -1.339607551130006e-07, -1.373519871053711e-07, -1.40829068510062e-07, -1.443941726135345e-07, -1.480495277191688e-07, -1.517974185400189e-07, -1.556401876268253e-07, -1.595802368321789e-07, -1.636200288117472e-07, -1.677620885635077e-07, -1.720090050059443e-07, -1.763634325961956e-07, -1.808280929891697e-07, -1.854057767386539e-07, -1.900993450414929e-07, -1.94911731525918e-07, -1.998459440851484e-07, -2.049050667574108e-07, -2.100922616535486e-07, -2.154107709334321e-07, -2.208639188323988e-07, -2.264551137389931e-07, -2.321878503253053e-07, -2.380657117312372e-07, -2.440923718040639e-07, -2.5027159739469e-07, -2.566072507120328e-07, -2.631032917370108e-07, -2.697637806976364e-07, -2.765928806067689e-07, -2.835948598641091e-07, -2.907740949240623e-07, -2.981350730311411e-07, -3.056823950246075e-07, -3.134207782141204e-07, -3.213550593281765e-07, -3.294901975371907e-07, -3.378312775531062e-07, -3.463835128074676e-07, -3.551522487099484e-07, -3.641429659893677e-07, -3.733612841192805e-07, -3.828129648302916e-07, -3.925039157112752e-07, -4.02440193901764e-07, -4.12628009877808e-07, -4.230737313336702e-07, -4.33783887161789e-07, -4.447651715334867e-07, -4.560244480829845e-07, -4.675687541973321e-07, -4.794053054149342e-07, -4.915414999354293e-07, -5.039849232437236e-07, -5.167433528510909e-07, -5.298247631562856e-07, -5.432373304297131e-07, -5.569894379237765e-07, -5.710896811125796e-07, -5.855468730642802e-07, -6.003700499494353e-07, -6.155684766887901e-07, -6.311516527440397e-07, -6.471293180551717e-07, -6.635114591281183e-07, -6.803083152765093e-07, -6.975303850214314e-07, -7.151884326531978e-07, -7.332934949592135e-07, -7.518568881221618e-07, -7.708902147928119e-07, -7.904053713418646e-07, -8.104145552953805e-07, -8.309302729584204e-07, -8.519653472316796e-07, -8.735329256259932e-07, -8.956464884797161e-07, -9.183198573841293e-07, -9.415672038221125e-07, -9.654030580255118e-07, -9.898423180567112e-07, -1.014900259120105e-06, -1.040592543109279e-06, -1.066935228395852e-06, -1.093944779866141e-06, -1.121638079211866e-06, -1.150032435481368e-06, -1.179145595897921e-06, -1.208995756951876e-06, -1.239601575773616e-06, -1.270982181794379e-06, -1.303157188702278e-06, -1.336146706700967e-06, -1.369971355078606e-06, -1.404652275095002e-06, -1.440211143194978e-06, -1.476670184556193e-06, -1.514052186979943e-06, -1.552380515133534e-06, -1.591679125153233e-06, -1.631972579616822e-06, -1.673286062895188e-06, -1.715645396892498e-06, -1.759077057184807e-06, -1.80360818956719e-06, -1.84926662701973e-06, -1.896080907102968e-06, -1.944080289793695e-06, -1.993294775772188e-06, -2.043755125172389e-06, -2.095492876806675e-06, -2.148540367877273e-06, -2.202930754186634e-06, -2.258698030859345e-06, -2.315877053588623e-06, -2.374503560420567e-06, -2.434614194089837e-06, -2.496246524920723e-06, -2.559439074307848e-06, -2.624231338791266e-06, -2.690663814740925e-06, -2.758778023665952e-06, -2.828616538164596e-06, -2.900223008530932e-06, -2.973642190035162e-06, -3.048919970894289e-06, -3.126103400950872e-06, -3.20524072107767e-06, -3.286381393326497e-06, -3.369576131840307e-06, -3.454876934547586e-06, -3.542337115659036e-06, -3.632011338986802e-06, -3.723955652106936e-06, -3.818227521386728e-06, -3.914885867898414e-06, -4.013991104242011e-06, -4.115605172300151e-06, -4.21979158194841e-06, -4.326615450745575e-06, -4.436143544628317e-06, -4.548444319635952e-06, -4.663587964691271e-06, -4.781646445464006e-06, -4.902693549344693e-06, -5.026804931556605e-06, -5.154058162434838e-06, -5.28453277590207e-06, -5.418310319170999e-06, -5.555474403704981e-06, -5.696110757468153e-06, -5.840307278498122e-06, -5.988154089834469e-06, -6.139743595837279e-06, -6.295170539931238e-06, -6.454532063810864e-06, -6.617927768144242e-06, -6.785459774813069e-06, -6.957232790727624e-06, -7.133354173257057e-06, -7.313933997315183e-06, -7.499085124144259e-06, -7.688923271839393e-06, -7.88356708765751e-06, -8.083138222156525e-06, -8.287761405210339e-06, -8.497564523947667e-06, -8.71267870266316e-06, -8.933238384750476e-06, -9.159381416709046e-06, -9.391249134276279e-06, -9.62898645073947e-06, -9.872741947482449e-06, -1.012266796682313e-05, -1.037892070720066e-05, -1.064166032077067e-05, -1.091105101347031e-05, -1.118726114761514e-05, -1.147046334709178e-05, -1.176083460521262e-05, -1.205855639529898e-05, -1.236381478406249e-05, -1.267680054785516e-05, -1.29977092918604e-05, -1.332674157230013e-05, -1.366410302173323e-05, -1.401000447752434e-05, -1.436466211356287e-05, -1.472829757531395e-05, -1.510113811828669e-05, -1.548341675000473e-05, -1.587537237556869e-05, -1.627724994690112e-05, -1.668930061576624e-05, -1.711178189066139e-05, -1.754495779767648e-05, -1.798909904542277e-05, -1.844448319413362e-05, -1.891139482904206e-05, -1.939012573814453e-05, -1.98809750944601e-05, -2.038424964289996e-05, -2.090026389186323e-05, -2.142934030967839e-05, -2.197180952601313e-05, -2.252801053837772e-05, -2.309829092385104e-05, -2.368300705616091e-05, -2.428252432825392e-05, -2.48972173804939e-05, -2.552747033463051e-05, -2.617367703368444e-05, -2.683624128789799e-05, -2.751557712690468e-05, -2.821210905827478e-05, -2.89262723325976e-05, -2.965851321526599e-05, -3.040928926513197e-05, -3.117906962020691e-05, -3.196833529058456e-05, -3.277757945876855e-05, -3.360730778759205e-05, -3.445803873592061e-05, -3.533030388233495e-05, -3.622464825699492e-05, -3.714163068189115e-05, -3.808182411969607e-05, -3.90458160314313e-05, -4.003420874317345e-05, -4.104761982202715e-05, -4.208668246159798e-05, -4.315204587720583e-05, -4.424437571108416e-05, -4.536435444781621e-05, -4.651268184026758e-05, -4.76900753462785e-05, -4.889727057638781e-05, -5.013502175286706e-05, -5.140410218034829e-05, -5.270530472833961e-05, -5.403944232592646e-05, -5.540734846896661e-05, -5.680987774009345e-05, -5.824790634184938e-05, -5.97223326432817e-05, -6.123407774033781e-05, -6.27840860304092e-05, -6.437332580137941e-05, -6.600278983554066e-05, -6.767349602875481e-05, -6.938648802524053e-05, -7.114283586838107e-05, -7.294363666795576e-05, -7.479001528420658e-05, -7.668312502916601e-05, -7.862414838567737e-05, -8.061429774455432e-05, -8.2654816160335e-05, -8.474697812609725e-05, -8.689209036781534e-05, -8.909149265874766e-05, -9.134655865435969e-05, -9.365869674829718e-05, -9.602935094993782e-05, -9.846000178406452e-05, -0.0001009521672132128, -0.0001035074035832646, -0.0001061273065928683, -0.0001088135122872844, -0.0001115676980772698, -0.0001143915837836258, -0.0001172869327080549, -0.0001202555527309868, -0.0001232992974370442, -0.000126420067268847, -0.0001296198107098582, -0.0001329005254970029, -0.000136264259863803, -0.0001397131138147899, -0.0001432492404319811, -0.0001468748472142166, -0.0001505921974501813, -0.0001544036116259517, -0.0001583114688679309, -0.0001623182084220516, -0.0001664263311701578, -0.0001706384011844872, -0.0001749570473212071, -0.0001793849648539772, -0.0001839249171485332, -0.0001885797373793181, -0.0001933523302892031, -0.0001982456739933721, -0.0002032628218284702, -0.0002084069042481355, -0.0002136811307660731, -0.0002190887919478495, -0.0002246332614526142, -0.0002303179981259935, -0.0002361465481454157, -0.000242122547219178, -0.0002482497228405784, -0.0002545318965984793, -0.0002609729865456984, -0.0002675770096266541, -0.0002743480841657349, -0.0002812904324178878, -0.0002884083831829622, -0.0002957063744853869, -0.0003031889563207766, -0.000310860793471135, -0.0003187266683903281, -0.0003267914841615619, -0.000335060267528634, -0.0003435381720027672, -0.0003522304810468838, -0.0003611426113392162, -0.0003702801161181985, -0.0003796486886106282, -0.0003892541655451293, -0.0003991025307530107, -0.0004091999188586419, -0.0004195526190615346, -0.000430167079012367, -0.0004410499087852225, -0.0004522078849483999, -0.0004636479547361752, -0.0004753772403239678, -0.0004874030432094205, -0.0004997328487019407, -0.0005123743305233457, -0.0005253353555222755, -0.0005386239885051249, -0.000552248497186301, -0.0005662173572606612, -0.0005805392576010847, -0.0005952231055841647, -0.0006102780325470861, -0.0006257133993788405, -0.0006415388022489563, -0.0006577640784770478, -0.0006743993125465209, -0.0006914548422658514, -0.0007089412650809469, -0.0007268694445421609, -0.0007452505169296019, -0.0007640958980404903, -0.000783417290142351, -0.0008032266890959516, -0.0008235363916519569, -0.0008443590029253508, -0.0008657074440517916, -0.0008875949600301133, -0.0009100351277553068, -0.0009330418642463925, -0.0009566294350736662, -0.0009808124629899395, -0.001005605936770441, -0.001031025220266167, -0.001057086061675556, -0.001083804603039454, -0.001111197389964455, -0.00113928138157977, -0.0011680739607329, -0.001197592944429499, -0.001227856594522845, -0.001258883628658573, -0.001290693231480268, -0.001323305066101748, -0.001356739285851915, -0.001391016546298124, -0.001426158017554232, -0.001462185396879471, -0.001499120921574476, -0.00153698738218089, -0.00157580813599103, -0.001615607120874283, -0.001656408869426929, -0.001698238523452247, -0.001741121848777834, -0.00178508525041717, -0.00183015578808261, -0.001876361192057014, -0.00192372987943138, -0.001972290970715945, -0.002022074306832237, -0.002073110466493797, -0.002125430783983235, -0.002179067367333472, -0.002234053116921069, -0.002290421744479601, -0.002348207792541192, -0.00240744665431431, -0.002468174594006049, -0.002530428767597195, -0.002594247244078338, -0.002659669027155522, -0.002726734077433788, -0.002795483335087123, -0.002865958743023326, -0.002938203270552275, -0.003012260937566228, -0.00308817683924064, -0.003165997171264054, -0.003245769255605673, -0.003327541566828973, -0.003411363758959989, -0.003497286692918569, -0.003585362464520964, -0.003675644433062091, -0.003768187250485457, -0.003863046891148977, -0.003960280682194458, -0.004059947334528535, -0.004162106974422637, -0.004266821175739281, -0.004374152992791935, -0.004484166993845225, -0.004596929295262139, -0.004712507596304564, -0.004830971214592953, -0.004952391122230893, -0.005076839982599626, -0.005204392187827226, -0.005335123896936809, -0.005469113074677264, -0.005606439531039877, -0.005747184961463239, -0.00589143298772831, -0.006039269199544836, -0.006190781196829392, -0.006346058632674721, -0.006505193257008876, -0.006668278960941972, -0.006835411821797187, -0.007006690148821472, -0.007182214529570577, -0.007362087876961385, -0.007546415476983401, -0.007735305037059903, -0.007928866735047384, -0.008127213268860869, -0.008330459906710407, -0.008538724537932545, -0.008752127724398689, -0.008970792752479749, -0.009194845685545065, -0.00942441541697047, -0.009659633723628411, -0.00990063531983044, -0.01014755791168909, -0.01040054225186418, -0.0106597321946546, -0.01092527475139382, -0.01119732014610412, -0.01147602187136014, -0.01176153674430955, -0.01205402496279384, -0.01235365016150797, -0.01266057946813388, -0.01297498355937692, -0.01329703671683046, -0.01362691688258788, -0.01396480571451593, -0.01431088864109763, -0.01466535491574638, -0.01502839767048742, -0.0154002139688948, -0.01578100485816591, -0.0161709754202077, -0.01657033482160075, -0.01697929636229979, -0.01739807752291979, -0.01782690001044829, -0.01826598980221522, -0.01871557718794077, -0.01917589680967274, -0.01964718769941269, -0.02012969331421974, -0.02062366156856844, -0.02112934486372484, -0.02164700011389218, -0.02217688876886372, -0.02271927683290594, -0.02327443487958138, -0.02384263806220364, -0.02442416611960252, -0.02501930337685925, -0.02562833874065523, -0.0262515656888592, -0.02688928225395835, -0.02754179099992092, -0.02820939899205527, -0.02889241775941125, -0.02959116324924634, -0.03030595577305677, -0.03103711994365073, -0.03178498460271534, -0.03254988273830461, -0.03333215139164936, -0.03413213155266205, -0.03495016804348348, -0.03578660938938791, -0.03664180767633399, -0.03751611839441927, -0.0384099002664626, -0.03932351506090929, -0.0402573273882186, -0.04121170447985992, -0.04218701594901036, -0.0431836335320087, -0.04420193080958746, -0.04524228290686553, -0.04630506617104758, -0.04739065782573866, -0.04849943560074212, -0.04963177733617304, -0.05078806055967719, -0.05196866203550772, -0.05317395728417218, -0.05440432007132114, -0.05566012186451366, -0.05694173125645254, -0.0582495133532447, -0.05958382912620646, -0.06094503472569154, -0.06233348075539037, -0.06374951150550895, -0.0651934641432064, -0.06666566785863945, -0.06816644296493096, -0.06969609995035908, -0.07125493848103587, -0.07284324635232939, -0.07446129838726628, -0.0761093552801417, -0.07778766238355989, -0.07949644843712692, -0.0812359242360258, -0.08300628123771699, -0.08480769010502806, -0.08664029918392979, -0.08850423291433025, -0.09039959017227328, -0.09232644254198484, -0.09428483251628536, -0.09627477162397427, -0.09829623848289037, -0.1003491767774717, -0.1024334931597709, -0.1045490550730317, -0.1066956884971104, -0.108873175615211, -0.111081252401625, -0.1133196061304051, -0.1155878728051657, -0.1178856345105054, -0.1202124166858628, -0.1225676853229794, -0.1249508440885318, -0.1273612313739215, -0.1297981172746765, -0.1322607005024234, -0.1347481052329345, -0.1372593778943519, -0.1397934839003207, -0.1423493043334642, -0.1449256325853637, -0.1475211709600065, -0.1501345272485149, -0.152764211283874, -0.1554086314853526, -0.1580660914033365, -0.1607347862763913, -0.1634127996135343, -0.1660980998159177, -0.16878853685343, -0.1714818390130763, -0.1741756097374394, -0.1768673245730207, -0.1795543282498309, -0.1822338319152379, -0.1849029105467768, -0.1875585005703934, -0.1901973977124104, -0.1928162551153778, -0.1954115817498973, -0.1979797411564641, -0.2005169505533705, -0.2030192803487302, -0.2054826540967096, -0.2079028489400806, -0.2102754965832166, -0.2125960848416284, -0.2148599598160582, -0.217062328740989, -0.2191982635591802, -0.2212627052754461, -0.2232504691443588, -0.22515625074782, -0.2269746330194812, -0.2287000942737699, -0.2303270172977193, -0.231849699563909, -0.2332623646224987, -0.2345591747295502, -0.2357342447675285, -0.2367816575119581, -0.2376954802956554, -0.2384697831186661, -0.2390986582479309, -0.2395762413457315, -0.2398967341600166, -0.240054428802712, -0.2400437336339938, -0.2398592007611436, -0.2394955551499494, -0.2389477253345504, -0.2382108756980873, -0.2372804402814137, -0.236152158060386, -0.2348221096137915, -0.2332867550837686, -0.2315429733085361, -0.2295881019833723, -0.2274199786800521, -0.225036982527361, -0.2224380763258972, -0.2196228488392166, -0.2165915569705515, -0.2133451675000154, -0.2098853980215219, -0.2062147566819053, -0.2023365802871502, -0.1982550703026231, -0.1939753262361921, -0.1895033758555367, -0.1848462016545146, -0.1800117629487134, -0.17500901294812, -0.1698479101260836, -0.164539423179323, -0.1590955288548566, -0.1535292019075261, -0.147854396447599, -0.142086017943276, -0.1362398851591999, -0.1303326813411111, -0.1243818940001936, -0.1184057427103275, -0.1124230944093131, -0.1064533657929549, -0.1005164125107829, -0.09463240501584787, -0.08882169109037637, -0.08310464526571384, -0.07750150558031016, -0.07203219837476992, -0.06671615210885731, -0.06157210150218317, -0.05661788364782418, -0.05187022812525292, -0.04734454354396419, -0.04305470337911152, -0.03901283441139196, -0.03522911155004485, -0.03171156329329625, -0.02846589255654663, -0.02549531806471826, -0.02280044194924179, -0.02037914959782097, -0.01822654815967437, -0.01633495039155428, -0.01469391071906536, -0.01329032045999105, -0.01210856908550224, -0.01113077815315595, -0.01033711410262809, -0.009706185429812816, -0.009215528815129131, -0.008842187545304525, -0.00856338400304786, -0.008357286076366141, -0.008203865032473084, -0.008085839688964397, -0.007989698582519189, -0.007906788277111124, -0.007832570551128404, -0.007759808686770509, -0.007680794137338243, -0.007588757859692485, -0.007477919327846472, -0.00734352441654329, -0.007181871301995657, -0.006990323615708949, -0.006767310190870846, -0.006512310858683201, -0.006225827884074775, -0.005909342775144211, -0.005565258357585701, -0.005196826172719882, -0.004808059433633827, -0.004403631956570564, -0.003988763671465723, -0.00356909350409762, -0.003150540609610397, -0.002739155124229287, -0.002340962808278224, -0.001961808394883802, -0.001607180005996134, -0.001282016482447301, -0.0009905379385695134, -0.0007360643284918256, -0.0005208393651354726, -0.0003458582214568295, -0.0002107019523537312, -0.0001133810123592482, -5.019041237308564e-05, -1.557916568400985e-05, -2.036813376626867e-06, 2.775557561562891e-16, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 0 + }, + { + "i": 0, + "radial_function": [ + -4.137138343801557e-08, -4.241870492481753e-08, -4.349253947763553e-08, -4.4593558277744e-08, -4.57224494974429e-08, -4.687991873018654e-08, -4.806668943160202e-08, -4.928350337167181e-08, -5.053112109836293e-08, -5.181032241299388e-08, -5.312190685763431e-08, -5.446669421484415e-08, -5.584552502006334e-08, -5.725926108697265e-08, -5.870878604615455e-08, -6.019500589738924e-08, -6.171884957593315e-08, -6.32812695331324e-08, -6.488324233173414e-08, -6.652576925626931e-08, -6.820987693888569e-08, -6.993661800102535e-08, -7.17070717113453e-08, -7.352234466029389e-08, -7.53835714517642e-08, -7.729191541225587e-08, -7.924856931799043e-08, -8.125475614043291e-08, -8.331172981068628e-08, -8.542077600323751e-08, -8.758321293954256e-08, -8.98003922119555e-08, -9.207369962851487e-08, -9.440455607911561e-08, -9.679441842360914e-08, -9.924478040238392e-08, -1.01757173569999e-07, -1.043331682524516e-07, -1.069743745286782e-07, -1.096824432369032e-07, -1.124590670064605e-07, -1.153059813157392e-07, -1.182249655769085e-07, -1.212178442481039e-07, -1.242864879737678e-07, -1.274328147538571e-07, -1.306587911426506e-07, -1.339664334779034e-07, -1.373578091411171e-07, -1.408350378497146e-07, -1.444002929819247e-07, -1.480558029352074e-07, -1.518038525190675e-07, -1.556467843831259e-07, -1.595870004813454e-07, -1.636269635733188e-07, -1.677691987635659e-07, -1.720162950797963e-07, -1.763709070911247e-07, -1.808357565672537e-07, -1.854136341796555e-07, -1.90107401245821e-07, -1.949199915176643e-07, -1.998544130151988e-07, -2.049137499066356e-07, -2.101011644360718e-07, -2.154198988999827e-07, -2.208732776737457e-07, -2.264647092894653e-07, -2.321976885664012e-07, -2.380757987953224e-07, -2.441027139781625e-07, -2.502822011243693e-07, -2.566181226053861e-07, -2.631144385687385e-07, -2.697752094132284e-07, -2.766045983267923e-07, -2.836068738886014e-07, -2.907864127370353e-07, -2.981477023051943e-07, -3.056953436256588e-07, -3.134340542062522e-07, -3.213686709786042e-07, -3.295041533213517e-07, -3.378455861598779e-07, -3.463981831445123e-07, -3.551672899091924e-07, -3.641583874126145e-07, -3.733770953639647e-07, -3.82829175735373e-07, -3.925205363632797e-07, -4.024572346409739e-07, -4.126454813046043e-07, -4.230916443150308e-07, -4.338022528379493e-07, -4.447840013247613e-07, -4.560437536967593e-07, -4.67588547635226e-07, -4.794255989801393e-07, -4.915623062402283e-07, -5.040062552171946e-07, -5.167652237469988e-07, -5.298471865611676e-07, -5.432603202711624e-07, -5.570130084789277e-07, -5.711138470168017e-07, -5.855716493200818e-07, -6.003954519355866e-07, -6.155945201696672e-07, -6.311783538791957e-07, -6.471566934091408e-07, -6.63539525680458e-07, -6.803370904320862e-07, -6.975598866209548e-07, -7.15218678984009e-07, -7.333245047663357e-07, -7.518886806196172e-07, -7.709228096752096e-07, -7.90438788796269e-07, -8.104488160134647e-07, -8.309653981489094e-07, -8.520013586330951e-07, -8.735698455196985e-07, -8.956843397032774e-07, -9.183586633449926e-07, -9.416069885116069e-07, -9.654438460331852e-07, -9.898841345850072e-07, -1.014943129999386e-06, -1.040636494813205e-06, -1.066980288057123e-06, -1.09399097529261e-06, -1.121685438903026e-06, -1.150080988645234e-06, -1.179195372468301e-06, -1.209046787606047e-06, -1.239653891950407e-06, -1.271035815712686e-06, -1.303212173379995e-06, -1.336203075974363e-06, -1.370029143622144e-06, -1.404711518441619e-06, -1.440271877756823e-06, -1.47673244764585e-06, -1.514116016832121e-06, -1.552445950927272e-06, -1.591746207034583e-06, -1.63204134872207e-06, -1.673356561374575e-06, -1.715717667934491e-06, -1.7591511450409e-06, -1.803684139577262e-06, -1.849344485637965e-06, -1.896160721924349e-06, -1.944162109581074e-06, -1.993378650483961e-06, -2.04384110599076e-06, -2.095581016166546e-06, -2.148630719495748e-06, -2.203023373093162e-06, -2.25879297342651e-06, -2.31597437756358e-06, -2.374603324957148e-06, -2.434716459781334e-06, -2.496351353833357e-06, -2.559546530014908e-06, -2.624341486408003e-06, -2.690776720960106e-06, -2.758893756794176e-06, -2.82873516815932e-06, -2.900344607038253e-06, -2.973766830428331e-06, -3.049047728313012e-06, -3.12623435234135e-06, -3.205374945233435e-06, -3.286518970930006e-06, -3.369717145505361e-06, -3.455021468862503e-06, -3.542485257230658e-06, -3.632163176485286e-06, -3.724111276311377e-06, -3.818387025231577e-06, -3.915049346520726e-06, -4.014158655029515e-06, -4.11577689494014e-06, -4.219967578477442e-06, -4.326795825600017e-06, -4.436328404695705e-06, -4.54863377430715e-06, -4.66378212591347e-06, -4.781845427794521e-06, -4.902897470005564e-06, -5.027013910489976e-06, -5.154272322359156e-06, -5.284752242369017e-06, -5.418535220623205e-06, -5.555704871534471e-06, -5.69634692607553e-06, -5.840549285352429e-06, -5.98840207553376e-06, -6.139997704169825e-06, -6.295430917937372e-06, -6.454798861845405e-06, -6.618201139939507e-06, -6.785739877542386e-06, -6.957519785069334e-06, -7.133648223458939e-06, -7.314235271259294e-06, -7.499393793412068e-06, -7.689239511777216e-06, -7.883891077442145e-06, -8.083470144861022e-06, -8.288101447869849e-06, -8.497912877625263e-06, -8.713035562515558e-06, -8.933603950093564e-06, -9.159755891083145e-06, -9.39163272551094e-06, -9.629379371017826e-06, -9.87314441340486e-06, -1.012308019947011e-05, -1.037934293219484e-05, -1.064209276833775e-05, -1.091149391849871e-05, -1.11877147497143e-05, -1.147092789064887e-05, -1.176131033944745e-05, -1.20590435743169e-05, -1.236431366690509e-05, -1.267731139854855e-05, -1.299823237946068e-05, -1.332727717093598e-05, -1.366465141064517e-05, -1.401056594110041e-05, -1.436523694137046e-05, -1.472888606212735e-05, -1.510174056411001e-05, -1.548403346008986e-05, -1.587600366042784e-05, -1.627789612231346e-05, -1.66899620027783e-05, -1.711245881558061e-05, -1.754565059205739e-05, -1.798980804604539e-05, -1.844520874297325e-05, -1.891213727323021e-05, -1.939088542991981e-05, -1.988175239110887e-05, -2.038504490668577e-05, -2.090107748994428e-05, -2.143017261401218e-05, -2.197266091324754e-05, -2.252888138972742e-05, -2.309918162495845e-05, -2.368391799694047e-05, -2.428345590271882e-05, -2.489816998656391e-05, -2.552844437391977e-05, -2.617467291126796e-05, -2.68372594120555e-05, -2.75166179088403e-05, -2.821317291181113e-05, -2.892735967384268e-05, -2.965962446225108e-05, -3.041042483741895e-05, -3.118022993846295e-05, -3.196952077612218e-05, -3.277879053304924e-05, -3.360854487169078e-05, -3.445930224994936e-05, -3.533159424482241e-05, -3.622596588422013e-05, -3.714297598716803e-05, -3.80831975126061e-05, -3.90472179170013e-05, -4.003563952099529e-05, -4.104907988531592e-05, -4.208817219618508e-05, -4.315356566046324e-05, -4.424592591077594e-05, -4.536593542087301e-05, -4.65142939314798e-05, -4.769171888690331e-05, -4.889894588266559e-05, -5.013672912444137e-05, -5.14058418985846e-05, -5.270707705453687e-05, -5.40412474994152e-05, -5.540918670508769e-05, -5.68117492280504e-05, -5.824981124242753e-05, -5.972427108642642e-05, -6.12360498225844e-05, -6.278609181215574e-05, -6.437536530399443e-05, -6.600486303829601e-05, -6.767560286557439e-05, -6.938862838125478e-05, -7.11450095762764e-05, -7.294584350410772e-05, -7.479225496458525e-05, -7.668539720500057e-05, -7.862645263886735e-05, -8.06166335828135e-05, -8.265718301205335e-05, -8.474937533490572e-05, -8.689451718683763e-05, -8.909394824452153e-05, -9.134904206041056e-05, -9.366120691834487e-05, -9.60318867107169e-05, -9.846256183773731e-05, -0.0001009547501293536, -0.0001035100077903912, -0.0001061299303694979, -0.0001088161537524875, -0.000111570355180695, -0.0001143942542949684, -0.0001172896142059392, -0.0001202582425912297, -0.0001233019928202692, -0.0001264227651074126, -0.0001296225076940649, -0.0001329032180605404, -0.0001362669441683965, -0.0001397157857340065, -0.0001432518955341454, -0.0001468774807443957, -0.000150594804311185, -0.0001544061863582977, -0.000158314005628721, -0.0001623207009627001, -0.0001664287728129126, -0.0001706407847976804, -0.0001749593652931669, -0.0001793872090655324, -0.0001839270789440354, -0.0001885818075361039, -0.0001933542989854151, -0.0001982475307740519, -0.0002032645555698338, -0.0002084085031199347, -0.0002136825821919448, -0.0002190900825635448, -0.0002246343770619998, -0.0002303189236547063, -0.0002361472675920501, -0.0002421230436038759, -0.0002482499781508875, -0.0002545318917323354, -0.0002609727012513857, -0.0002675764224395814, -0.0002743471723418652, -0.0002812891718636472, -0.0002884067483814441, -0.0002957043384186564, -0.0003031864903880801, -0.0003108578674027935, -0.0003187232501570956, -0.0003267875398792123, -0.0003350557613575294, -0.0003435330660421451, -0.0003522247352235936, -0.0003611361832906143, -0.000370272961068899, -0.0003796407592427955, -0.0003892454118619732, -0.0003990928999351345, -0.0004091893551128754, -0.0004195410634618616, -0.0004301544693325385, -0.000441036179322627, -0.0004521929663387401, -0.000463631773758474, -0.0004753597196954071, -0.0004873841013694854, -0.0004997123995853205, -0.0005123522833210092, -0.000525311614430113, -0.0005385984524595143, -0.0005522210595859255, -0.0005661879056738706, -0.0005805076734580546, -0.0005951892638530719, -0.0006102418013934797, -0.000625674639807334, -0.0006414973677263517, -0.000657719814535914, -0.0006743520563682338, -0.0006914044222420409, -0.0007088875003522354, -0.0007268121445130305, -0.0007451894807581649, -0.0007640309141018698, -0.0007833481354643231, -0.0008031531287654197, -0.0008234581781907675, -0.0008442758756338733, -0.00086561912831861, -0.0008875011666060961, -0.0009099355519902253, -0.0009329361852861678, -0.0009565173150162323, -0.000980693545997601, -0.001005479848136495, -0.001030891565433448, -0.001056944425204457, -0.001083654547522813, -0.001111038454886611, -0.001139113082116928, -0.001167895786491801, -0.001197404358121237, -0.001227657030568537, -0.001258672491723373, -0.001290469894932092, -0.001323068870390848, -0.001356489536807269, -0.001390752513336404, -0.001425878931796872, -0.001461890449173169, -0.001498809260410192, -0.00153665811150618, -0.001575460312910264, -0.001615239753231052, -0.001656020913262638, -0.001697828880334578, -0.001740689362992484, -0.001784628706015879, -0.001829673905780188, -0.001875852625969689, -0.001923193213648395, -0.001971724715695938, -0.002021476895615508, -0.002072480250721103, -0.002124766029711313, -0.00217836625063695, -0.002233313719269967, -0.002289642047881012, -0.002347385674433219, -0.002406579882199692, -0.002467260819812312, -0.002529465521749464, -0.002593231929270301, -0.002658598911803259, -0.002725606288796451, -0.002794294852037615, -0.002864706388451333, -0.002936883703381071, -0.003010870644363766, -0.003086712125404483, -0.00316445415175867, -0.003244143845229516, -0.00332582946998771, -0.003409560458920994, -0.00349538744052061, -0.003583362266311704, -0.003673538038834627, -0.003765969140183759, -0.003860711261110553, -0.003957821430696994, -0.004057358046605637, -0.0041593809059121, -0.004263951236525428, -0.00437113172920175, -0.004480986570155994, -0.00459358147427621, -0.004708983718944679, -0.004827262178469318, -0.004948487359128749, -0.005072731434833566, -0.005200068283405918, -0.005330573523478942, -0.005464324552016772, -0.005601400582455359, -0.00574188268346339, -0.005885853818321847, -0.006033398884919976, -0.006184604756364208, -0.006339560322196009, -0.006498356530213146, -0.006661086428887958, -0.006827845210375027, -0.006998730254099037, -0.007173841170912748, -0.00735327984781303, -0.007537150493201616, -0.007725559682675639, -0.007918616405330842, -0.008116432110559138, -0.008319120755319515, -0.008526798851859701, -0.00873958551586357, -0.008957602514996681, -0.00918097431782046, -0.009409828143042184, -0.009644294009065623, -0.009884504783804088, -0.01013059623471431, -0.0103827070790068, -0.01064097903398424, -0.0109055568674563, -0.01117658844817515, -0.01145422479623173, -0.01173862013334915, -0.01202993193300429, -0.01232832097030442, -0.01263395137154074, -0.01294699066333473, -0.01326760982128897, -0.01359598331804676, -0.01393228917066016, -0.01427670898715864, -0.01462942801220422, -0.01499063517171216, -0.01536052311630831, -0.01573928826348692, -0.01612713083832444, -0.01652425491259606, -0.01693086844213347, -0.01734718330225226, -0.01777341532106817, -0.01820978431051091, -0.01865651409483353, -0.01911383253640472, -0.01958197155855902, -0.0200611671652683, -0.02055165945738528, -0.02105369264519594, -0.02156751505700493, -0.02209337914346299, -0.0226315414773307, -0.02318226274835759, -0.02374580775293867, -0.02432244537819524, -0.02491244858010742, -0.02551609435530947, -0.02613366370613916, -0.02676544159851327, -0.0274117169121814, -0.02807278238288908, -0.0287489345359597, -0.02944047361078261, -0.03014770347567117, -0.03087093153253164, -0.03161046861075863, -0.03236662884974757, -0.03313972956938943, -0.03393009112788484, -0.03473803676618917, -0.03556389243837059, -0.03640798662713481, -0.03727065014374144, -0.03815221591150533, -0.03905301873204752, -0.03997339503342799, -0.04091368259926081, -0.04187422027788107, -0.04285534767059892, -0.04385740479804521, -0.04488073174357853, -0.04592566827269093, -0.04699255342731694, -0.0480817250939156, -0.0491935195441651, -0.05032827094707425, -0.05148631085128444, -0.05266796763630475, -0.05387356593138997, -0.05510342600074555, -0.05635786309371152, -0.05763718675855339, -0.05894170011846227, -0.0602716991083429, -0.06162747167095017, -0.06300929691091464, -0.06441744420518514, -0.06585217226840445, -0.06731372817172755, -0.06880234631358986, -0.07031824734093513, -0.07186163701942029, -0.07343270505112963, -0.07503162383834963, -0.07665854719198703, -0.07831360898324537, -0.07999692173722334, -0.08170857516715115, -0.08344863464804586, -0.08521713962864685, -0.08701410198057619, -0.08883950428377563, -0.09069329804738625, -0.09257540186536917, -0.09448569950631694, -0.09642403793706987, -0.09839022527993853, -0.1003840287035433, -0.1024051722475058, -0.1044533345814855, -0.1065281466993291, -0.1086291895494038, -0.1107559916025231, -0.1129080263592291, -0.1150847097985966, -0.1172853977711461, -0.1195093833389194, -0.1217558940662678, -0.1240240892654407, -0.126313057201646, -0.128621812262865, -0.1309492921003752, -0.1332943547466424, -0.1356557757179953, -0.1380322451103066, -0.1404223646967526, -0.1428246450376293, -0.1452375026131627, -0.1476592569912487, -0.150088128043133, -0.1525222332211403, -0.1549595849137373, -0.1573980878944259, -0.1598355368822321, -0.1622696142328719, -0.1646978877810379, -0.1671178088556532, -0.1695267104913885, -0.1719218058612091, -0.1743001869562373, -0.1766588235407353, -0.1789945624115641, -0.1813041269930239, -0.1835841172995185, -0.1858310103000267, -0.1880411607198456, -0.1902108023165335, -0.1923360496683622, -0.1944129005148921, -0.1964372386904969, -0.1984048376927275, -0.2003113649283415, -0.2021523866805631, -0.2039233738416721, -0.205619708455319, -0.2072366911129641, -0.208769549248539, -0.2102134463747648, -0.2115634923034834, -0.212814754390853, -0.2139622698462276, -0.2150010591409851, -0.215926140550388, -0.2167325458577292, -0.2174153372454719, -0.2179696253927544, -0.2183905887924832, -0.2186734942941662, -0.2188137188706419, -0.2188067725978309, -0.2186483228265568, -0.2183342195142858, -0.2178605216722747, -0.2172235248700634, -0.2164197897244677, -0.2154461712841969, -0.2142998492039455, -0.2129783585832774, -0.2114796213258897, -0.209801977853919, -0.2079442189899524, -0.2059056177963868, -0.2036859611378825, -0.2012855807080694, -0.1987053832365263, -0.1959468795666734, -0.1930122122698313, -0.1899041814356538, -0.1866262682548572, -0.183182655987018, -0.1795782478847855, -0.1758186816266449, -0.1719103397940314, -0.167860355915864, -0.1636766155950953, -0.159367752228626, -0.1549431368346531, -0.1504128615112273, -0.1457877160674777, -0.1410791573955525, -0.1362992711880059, -0.1314607256530556, -0.1265767169399584, -0.1216609060597189, -0.116727347173302, -0.1117904072214336, -0.1068646769875081, -0.1019648738186591, -0.09710573637995733, -0.09230191198286142, -0.0875678372111923, -0.08291761276507577, -0.07836487365423979, -0.07392265609483636, -0.0696032626959096, -0.06541812775951666, -0.06137768475801162, -0.05749123828808476, -0.05376684302767916, -0.05021119243163852, -0.04682952008668043, -0.04362551679647195, -0.04060126657267232, -0.03775720475606618, -0.03509210147048401, -0.03260307350746537, -0.03028562753696739, -0.0281337372239601, -0.02613995638724413, -0.02429556975037933, -0.02259078209107641, -0.02101494568202947, -0.01955682482236444, -0.01820489497717546, -0.01694767256947437, -0.01577406980610987, -0.0146737670749711, -0.01363759344113791, -0.01265790361992299, -0.01172893755106189, -0.01084686756366826, -0.01000850151591731, -0.009210964996905324, -0.008451810802649007, -0.007729015453815491, -0.007040967895433958, -0.00638645035289187, -0.005764611448509649, -0.005174931823596352, -0.004617182661060482, -0.004091377662741091, -0.003597719201950844, -0.003136539544148387, -0.002708238205616942, -0.002313216699528109, -0.001951812099390049, -0.001624231029255047, -0.00133048586640952, -0.001070335113209114, -0.0008432300604407361, -0.0006482708051518682, -0.000484174571984991, -0.0003492536385459044, -0.0002414067300084699, -0.0001581362268995024, -9.658391753804285e-05, -5.359222695108801e-05, -2.579275409556742e-05, -9.725073038369114e-06, -1.988510134977306e-06, 5.705212224819262e-07, 6.33262632752235e-07, 1.43365395201267e-07, -2.081668171172169e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 1 + }, + { + "i": 0, + "radial_function": [ + -1.045942238778372e-12, -1.085909784665608e-12, -1.127404570564809e-12, -1.170484955383953e-12, -1.215211528042554e-12, -1.261647192684902e-12, -1.309857257149521e-12, -1.359909524819202e-12, -1.411874389980807e-12, -1.465824936829009e-12, -1.521837042253104e-12, -1.579989482551572e-12, -1.640364044224402e-12, -1.703045638999004e-12, -1.768122423251558e-12, -1.835685921991601e-12, -1.905831157584406e-12, -1.978656783392086e-12, -2.054265222521371e-12, -2.132762811873278e-12, -2.214259951697166e-12, -2.29887126085959e-12, -2.386715738046302e-12, -2.477916929124084e-12, -2.572603100897908e-12, -2.670907421507579e-12, -2.77296814771786e-12, -2.87892881936524e-12, -2.988938461234958e-12, -3.103151792652193e-12, -3.221729445082108e-12, -3.344838188044913e-12, -3.472651163663588e-12, -3.605348130174143e-12, -3.743115714741028e-12, -3.886147675932947e-12, -4.034645176228657e-12, -4.188817064935618e-12, -4.348880171919657e-12, -4.515059612558657e-12, -4.687589104349091e-12, -4.866711295610884e-12, -5.052678106752705e-12, -5.245751084577758e-12, -5.446201770128404e-12, -5.654312080586775e-12, -5.870374705768793e-12, -6.094693519768972e-12, -6.327584008335095e-12, -6.569373712573804e-12, -6.820402689611062e-12, -7.081023990855579e-12, -7.351604158537605e-12, -7.632523741221573e-12, -7.92417782901775e-12, -8.226976609245155e-12, -8.541345943327936e-12, -8.867727965736015e-12, -9.206581705812612e-12, -9.55838373336321e-12, -9.923628828913802e-12, -1.030283067958128e-11, -1.069652260153456e-11, -1.110525829006242e-11, -1.152961259830342e-11, -1.197018234573243e-11, -1.242758715754146e-11, -1.29024703360952e-11, -1.339549976568652e-11, -1.39073688518652e-11, -1.443879749665998e-11, -1.499053311106647e-11, -1.556335166622443e-11, -1.615805878476279e-11, -1.677549087384774e-11, -1.741651630152688e-11, -1.808203661802432e-11, -1.877298782370448e-11, -1.949034168548747e-11, -2.023510710356778e-11, -2.100833153035885e-11, -2.181110244365816e-11, -2.264454887610625e-11, -2.350984300308927e-11, -2.440820179131964e-11, -2.53408887104123e-11, -2.630921550986508e-11, -2.731454406394109e-11, -2.835828828704909e-11, -2.944191612231487e-11, -3.056695160614069e-11, -3.173497701165702e-11, -3.294763507408033e-11, -3.420663130110767e-11, -3.551373637159725e-11, -3.687078862590795e-11, -3.827969665140171e-11, -3.974244196674394e-11, -4.126108180877825e-11, -4.283775202589463e-11, -4.447467008196024e-11, -4.617413817503884e-11, -4.793854647528403e-11, -4.977037648656096e-11, -5.167220453652482e-11, -5.364670540006366e-11, -5.569665606120313e-11, -5.782493961876341e-11, -6.003454934126198e-11, -6.232859287676561e-11, -6.471029662361053e-11, -6.718301026814183e-11, -6.975021149585163e-11, -7.241551088254223e-11, -7.518265697239587e-11, -7.805554155009017e-11, -8.103820511437641e-11, -8.413484256082071e-11, -8.734980908169616e-11, -9.06876262913299e-11, -9.415298858551478e-11, -9.775076974393594e-11, -1.014860297848927e-10, -1.053640220819626e-10, -1.093902007526133e-10, -1.135702283291553e-10, -1.179099837228274e-10, -1.224155704922125e-10, -1.270933254276167e-10, -1.319498274634852e-10, -1.369919069313878e-10, -1.422266551665924e-10, -1.476614344817379e-10, -1.533038885216313e-10, -1.591619530137399e-10, -1.652438669294895e-10, -1.715581840720807e-10, -1.7811378510711e-10, -1.849198900529258e-10, -1.919860712482837e-10, -1.99322266815539e-10, -2.069387946383205e-10, -2.148463668733378e-10, -2.230561050167401e-10, -2.315795555462156e-10, -2.404287061608323e-10, -2.496160026414674e-10, -2.591543663555356e-10, -2.690572124306431e-10, -2.793384686227263e-10, -2.900125949052176e-10, -3.010946038067918e-10, -3.126000815263008e-10, -3.245452098545957e-10, -3.36946788934079e-10, -3.498222608879843e-10, -3.631897343526424e-10, -3.77068009947228e-10, -3.914766067168155e-10, -4.064357895859497e-10, -4.219665978613291e-10, -4.380908748237185e-10, -4.548312984507024e-10, -4.722114133134978e-10, -4.902556636927076e-10, -5.089894279595628e-10, -5.284390542710805e-10, -5.486318976292728e-10, -5.695963583566011e-10, -5.913619220417777e-10, -6.139592010120794e-10, -6.374199773905793e-10, -6.617772477987921e-10, -6.870652697676683e-10, -7.1331960992223e-10, -7.405771940075623e-10, -7.688763588266623e-10, -7.982569061630636e-10, -8.287601587642094e-10, -8.604290184642958e-10, -8.933080265282957e-10, -9.274434263021813e-10, -9.628832282573401e-10, -9.996772775208085e-10, -1.03787732398627e-09, -1.077537095104434e-09, -1.118712371455295e-09, -1.161461065208477e-09, -1.205843301582126e-09, -1.251921503414961e-09, -1.29976047897037e-09, -1.349427513096265e-09, -1.400992461868744e-09, -1.454527850852895e-09, -1.510108977118906e-09, -1.567814015156986e-09, -1.627724126840265e-09, -1.689923575590187e-09, -1.754499844905202e-09, -1.821543761419491e-09, -1.891149622664751e-09, -1.963415329715064e-09, -2.038442524901213e-09, -2.116336734788443e-09, -2.197207518618756e-09, -2.281168622426574e-09, -2.368338139044841e-09, -2.458838674226426e-09, -2.552797519114863e-09, -2.650346829307018e-09, -2.751623810759565e-09, -2.856770912801202e-09, -2.965936028521848e-09, -3.079272702821108e-09, -3.196940348408771e-09, -3.319104470061085e-09, -3.445936897448884e-09, -3.57761602686474e-09, -3.714327072189725e-09, -3.856262325452937e-09, -4.003621427350266e-09, -4.156611648103665e-09, -4.315448179055694e-09, -4.480354435410174e-09, -4.651562370544967e-09, -4.829312802339161e-09, -5.013855751974434e-09, -5.205450795686965e-09, -5.404367429965557e-09, -5.61088545070988e-09, -5.825295346882437e-09, -6.047898709208955e-09, -6.27900865450194e-09, -6.518950266205367e-09, -6.768061051780516e-09, -7.026691417576761e-09, -7.295205161856556e-09, -7.573979986667995e-09, -7.863408029286402e-09, -8.163896413973052e-09, -8.475867824827744e-09, -8.799761100542598e-09, -9.136031851893844e-09, -9.485153102841888e-09, -9.847615956142268e-09, -1.022393028440498e-08, -1.061462544757558e-08, -1.102025103784864e-08, -1.144137765306277e-08, -1.187859769966681e-08, -1.233252622638806e-08, -1.280380178977725e-08, -1.329308735284948e-08, -1.38010712180872e-08, -1.432846799611994e-08, -1.487601961144539e-08, -1.544449634660925e-08, -1.603469792631482e-08, -1.664745464299073e-08, -1.728362852540233e-08, -1.794411455195401e-08, -1.862984191039279e-08, -1.934177530568803e-08, -2.00809163179316e-08, -2.084830481217214e-08, -2.164502040217088e-08, -2.247218397014321e-08, -2.333095924462789e-08, -2.422255443870928e-08, -2.514822395090259e-08, -2.610927013109997e-08, -2.71070451140692e-08, -2.814295272308952e-08, -2.92184504464103e-08, -3.033505148932095e-08, -3.149432690472536e-08, -3.269790780522869e-08, -3.394748765985546e-08, -3.524482467864099e-08, -3.659174428846104e-08, -3.799014170359262e-08, -3.944198459463525e-08, -4.094931585955888e-08, -4.25142565007904e-08, -4.413900861240095e-08, -4.582585848161023e-08, -4.757717980898931e-08, -4.939543705190649e-08, -5.128318889594151e-08, -5.324309185916817e-08, -5.527790403439905e-08, -5.739048897467815e-08, -5.95838197275112e-08, -6.18609830235348e-08, -6.422518362554488e-08, -6.667974884402828e-08, -6.922813322558586e-08, -7.187392342086903e-08, -7.462084323891962e-08, -7.74727588950552e-08, -8.043368445972492e-08, -8.350778751604315e-08, -8.66993950340048e-08, -9.001299946969435e-08, -9.345326509811716e-08, -9.702503458866207e-08, -1.00733335832282e-07, -1.045833890305278e-07, -1.08580614056087e-07, -1.127306380950335e-07, -1.170393035824287e-07, -1.21512676441627e-07, -1.261570546397563e-07, -1.309789770704821e-07, -1.359852327779055e-07, -1.411828705336823e-07, -1.465792087813768e-07, -1.521818459619938e-07, -1.579986712357123e-07, -1.640378756141727e-07, -1.70307963520344e-07, -1.768177647914468e-07, -1.835764471418064e-07, -1.905935291050161e-07, -1.978788934708412e-07, -2.054428012395733e-07, -2.13295906110243e-07, -2.214492695256354e-07, -2.29914376294583e-07, -2.387031508140176e-07, -2.478279739133011e-07, -2.573017003458728e-07, -2.671376769516669e-07, -2.773497615180764e-07, -2.879523423644902e-07, -2.989603586793183e-07, -3.103893216380126e-07, -3.222553363334619e-07, -3.345751245475103e-07, -3.473660483992244e-07, -3.606461349006227e-07, -3.744341014580931e-07, -3.887493823523583e-07, -4.036121562383269e-07, -4.190433747019085e-07, -4.350647919154041e-07, -4.51698995433593e-07, -4.689694381750465e-07, -4.869004716348642e-07, -5.055173803745264e-07, -5.248464178408987e-07, -5.449148435637854e-07, -5.657509617863854e-07, -5.873841615840289e-07, -6.098449585280133e-07, -6.331650379562367e-07, -6.573772999102578e-07, -6.825159058064906e-07, -7.086163269054511e-07, -7.357153946521988e-07, -7.638513529575366e-07, -7.93063912497615e-07, -8.233943071093062e-07, -8.548853523617831e-07, -8.87581506392366e-07, -9.2152893308986e-07, -9.567755677205676e-07, -9.93371185090589e-07, -1.031367470343385e-06, -1.070818092493646e-06, -1.11177878080843e-06, -1.154307404140866e-06, -1.198464053336695e-06, -1.244311126830007e-06, -1.291913419554875e-06, -1.341338215302183e-06, -1.392655382653105e-06, -1.445937474637321e-06, -1.501259832250969e-06, -1.558700691990123e-06, -1.618341297566395e-06, -1.680266015951247e-06, -1.744562457926869e-06, -1.811321603332588e-06, -1.880637931163929e-06, -1.952609554746318e-06, -2.02733836215863e-06, -2.104930162121386e-06, -2.185494835571616e-06, -2.269146493141329e-06, -2.356003638769605e-06, -2.446189339695666e-06, -2.539831403090178e-06, -2.637062559572911e-06, -2.738020653898561e-06, -2.84284884309924e-06, -2.95169580236321e-06, -3.06471593897468e-06, -3.182069614622073e-06, -3.303923376412912e-06, -3.430450196938745e-06, -3.561829723753491e-06, -3.698248538634774e-06, -3.839900427028642e-06, -3.98698665806799e-06, -4.139716275604899e-06, -4.298306400680901e-06, -4.462982545910627e-06, -4.633978942237348e-06, -4.811538878565546e-06, -4.995915054799994e-06, -5.187369948805004e-06, -5.386176197862816e-06, -5.59261699520769e-06, -5.806986502250609e-06, -6.029590277118191e-06, -6.260745720176117e-06, -6.50078253721313e-06, -6.750043221010988e-06, -7.008883552049715e-06, -7.27767311910798e-06, -7.556795860588359e-06, -7.846650627398023e-06, -8.147651768281244e-06, -8.460229738490263e-06, -8.784831732786128e-06, -9.121922343740649e-06, -9.47198424639178e-06, -9.835518910329077e-06, -1.02130473403267e-05, -1.060511084673695e-05, -1.101227184682756e-05, -1.143511469837973e-05, -1.187424656685424e-05, -1.233029832754217e-05, -1.280392550413906e-05, -1.329580924525789e-05, -1.380665734047966e-05, -1.433720527756269e-05, -1.488821734256862e-05, -1.546048776467368e-05, -1.605484190755779e-05, -1.667213750932327e-05, -1.731326597299962e-05, -1.797915370973735e-05, -1.867076353697824e-05, -1.93890961338685e-05, -2.013519155638318e-05, -2.091013081469371e-05, -2.171503751542365e-05, -2.255107957158097e-05, -2.341947098303233e-05, -2.432147369059558e-05, -2.525839950683759e-05, -2.623161212697221e-05, -2.724252922322528e-05, -2.829262462634603e-05, -2.938343059799111e-05, -3.051654019795886e-05, -3.169360975038297e-05, -3.291636141320073e-05, -3.418658585543398e-05, -3.550614504697319e-05, -3.68769751658e-05, -3.830108962786288e-05, -3.978058224494856e-05, -4.13176305162618e-05, -4.291449905958825e-05, -4.457354318827239e-05, -4.629721264047147e-05, -4.808805546749344e-05, -4.99487220883168e-05, -5.188196951773912e-05, -5.389066577594498e-05, -5.597779448766673e-05, -5.814645967947772e-05, -6.039989078417185e-05, -6.274144786162366e-05, -6.517462704594285e-05, -6.770306622923457e-05, -7.03305509927599e-05, -7.306102079677378e-05, -7.58985754409353e-05, -7.884748180769034e-05, -8.191218090166706e-05, -8.509729519870851e-05, -8.840763631885938e-05, -9.184821303837121e-05, -9.542423965636359e-05, -9.914114473274325e-05, -0.0001030045802146052, -0.0001070204309693307, -0.0001111948247433804, -0.0001155341425667478, -0.0001200450296240707, -0.0001247344066143266, -0.0001296094816221799, -0.0001346777625251769, -0.0001399470699621704, -0.0001454255508896113, -0.0001511216927536061, -0.0001570443383071394, -0.0001632027011031314, -0.0001696063816957506, -0.0001762653845837133, -0.0001831901359312685, -0.0001903915021041193, -0.0001978808090594411, -0.0002056698626311197, -0.0002137709697533994, -0.0002221969606680658, -0.0002309612121628344, -0.0002400776718907057, -0.0002495608838226136, -0.0002594260148882973, -0.0002696888828629641, -0.0002803659855602471, -0.0002914745313947764, -0.0003030324713809852, -0.0003150585326379205, -0.0003275722534732364, -0.0003405940201232029, -0.0003541451052292521, -0.0003682477081354943, -0.000382924997095817, -0.0003982011534833408, -0.0004141014180995495, -0.0004306521396852282, -0.0004478808257398919, -0.0004658161957618798, -0.0004844882370263619, -0.0005039282630241447, -0.0005241689746899737, -0.0005452445245550466, -0.0005671905839646841, -0.0005900444135087904, -0.0006138449368193904, -0.000638632817896598, -0.0006644505421319776, -0.0006913425012054254, -0.0007193550820400443, -0.0007485367600074361, -0.0007789381965842392, -0.0008106123416698119, -0.0008436145407836805, -0.0008780026473707806, -0.0009138371404523607, -0.0009511812478699467, -0.0009901010753802443, -0.001030665741869039, -0.001072947520963005, -0.001117021989329113, -0.001162968181962467, -0.001210868754774807, -0.001260810154807249, -0.001312882798402525, -0.001367181257683814, -0.00142380445569871, -0.001482855870599167, -0.001544443749239495, -0.001608681330586597, -0.001675687079347879, -0.001745584930233568, -0.001818504543281206, -0.001894581570680573, -0.001973957935547135, -0.002056782123101765, -0.002143209484722614, -0.002233402555343115, -0.002327531384676275, -0.002425773882750731, -0.00252831618024819, -0.002635353004133304, -0.002747088069068062, -0.002863734485099867, -0.00298551518210886, -0.003112663351492019, -0.003245422905551623, -0.00338404895504158, -0.003528808305307287, -0.003679979971432412, -0.003837855712779277, -0.004002740587276532, -0.004174953525770209, -0.004354827926708965, -0.004542712271382337, -0.004738970759871261, -0.004943983967801218, -0.005158149523910691, -0.005381882808359308, -0.005615617671600265, -0.0058598071735304, -0.006114924342505526, -0.006381462953669993, -0.006659938325893624, -0.006950888136437544, -0.007254873252280512, -0.007572478576826674, -0.007904313910485765, -0.008251014823362386, -0.008613243538013378, -0.008991689819928338, -0.009387071873056258, -0.009800137237340549, -0.01023166368483114, -0.01068246011051642, -0.011153367413556, -0.01164525936409557, -0.01215904345030734, -0.01269566169971825, -0.01325609146826489, -0.01384134618984513, -0.01445247607841788, -0.01509056877393914, -0.01575674992260224, -0.01645218368098284, -0.01717807313276426, -0.01793566060574024, -0.01872622787575862, -0.01955109624317732, -0.02041162646625837, -0.02130921853472245, -0.02224531126542902, -0.02322138170083844, -0.02423894428955137, -0.02529954982681465, -0.02640478413143491, -0.02755626643405486, -0.0287556474502358, -0.03000460711025314, -0.03130485191596741, -0.0326581118935906, -0.03406613710963771, -0.0355306937158603, -0.03705355948751055, -0.03863651881791218, -0.04028135713103836, -0.04198985467263971, -0.0437637796394718, -0.04560488060535749, -0.04751487820223885, -0.04949545601406404, -0.05154825064135301, -0.05367484089467198, -0.05587673607603213, -0.0581553633085253, -0.06051205387634772, -0.06294802853981876, -0.06546438179318577, -0.06806206503693875, -0.07074186864119576, -0.07350440288250436, -0.07635007774324104, -0.07927908157081544, -0.08229135860313855, -0.0853865853774643, -0.08856414605183259, -0.09182310668202268, -0.09516218851233688, -0.09857974035567878, -0.1020737101574626, -0.1056416158589093, -0.1092805156983269, -0.1129869781141571, -0.1167570514408285, -0.1205862336179124, -0.1244694421646336, -0.1284009847054079, -0.132374530367716, -0.1363830824110366, -0.1404189524846521, -0.1444737369525701, -0.1485382957652364, -0.1526027343997888, -0.1566563894326876, -0.1606878183501518, -0.1646847942421234, -0.1686343060636119, -0.1725225651823398, -0.1763350189623425, -0.1800563721584376, -0.1836706169147415, -0.1871610721700941, -0.1905104332726604, -0.1937008325930912, -0.1967139118984803, -0.199530907205666, -0.2021327467698622, -0.2045001627807666, -0.2066138172305862, -0.2084544422843659, -0.2100029953200226, -0.2112408286111328, -0.2121498733975253, -0.2127128378249977, -0.2129134179343547, -0.2127365205401895, -0.2121684964608093, -0.2111973821425752, -0.2098131472657637, -0.2080079454270893, -0.205776364469751, -0.2031156724804026, -0.2000260549005686, -0.1965108376166326, -0.1925766903085865, -0.1882338037667527, -0.1834960343437939, -0.1783810082153018, -0.172910177698023, -0.1671088215449604, -0.1610059809289284, -0.1546343227711802, -0.148029922202363, -0.1412319562947339, -0.1342823018138661, -0.1272250306427436, -0.1201057977685955, -0.1129711183288885, -0.1058675322207571, -0.09884065721574854, -0.09193413441041833, -0.08518847319485104, -0.07863980673517366, -0.07231857322739199, -0.0662481428545584, -0.0604434154122159, -0.05490941887803135, -0.04963994468200254, -0.04462312478939501, -0.0398650179511586, -0.03537566571421369, -0.0311631303041433, -0.02723341812420133, -0.02359042058479854, -0.02023587390334269, -0.01716933940971965, -0.01438820575862765, -0.01188771428602797, -0.009661008552857675, -0.007699208895738718, -0.005991512552868228, -0.004525319655577897, -0.0032863850751641, -0.002258995793832042, -0.001426173132417774, -0.0007698988207786028, -0.0002713635450748431, 8.876374772659457e-05, 0.0003300506359534827, 0.0004720055876357154, 0.0005337569088522676, 0.0005337090979068426, 0.0004892151985443904, 0.0004162344816732721, 0.0003289930919097017, 0.0002396480986152083, 0.0001579595952944102, 9.097508865890669e-05, 4.273071656374361e-05, 1.397404566577665e-05, 1.913516735729814e-06, -5.551115123125783e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 1, + "j": 2 + }, + { + "i": 0, + "radial_function": [ + -9.651637099685155e-13, -1.002044544943851e-12, -1.0403346704135e-12, -1.080087938190476e-12, -1.121360257873693e-12, -1.164209675480081e-12, -1.208696455081451e-12, -1.254883163560827e-12, -1.302834758607507e-12, -1.352618680074606e-12, -1.404304944827523e-12, -1.457966245216792e-12, -1.513678051313761e-12, -1.571518717052915e-12, -1.631569590430125e-12, -1.693915127911751e-12, -1.758643013215601e-12, -1.825844280630724e-12, -1.895613443049508e-12, -1.96804862489217e-12, -2.043251700110499e-12, -2.121328435465088e-12, -2.202388639277397e-12, -2.286546315866012e-12, -2.373919825884187e-12, -2.464632052784193e-12, -2.558810575642671e-12, -2.656587848589939e-12, -2.758101387095684e-12, -2.863493961373031e-12, -2.972913797172934e-12, -3.086514784251355e-12, -3.204456692802428e-12, -3.326905398161922e-12, -3.454033114097176e-12, -3.586018635011383e-12, -3.723047587403145e-12, -3.865312690934734e-12, -4.013014029476311e-12, -4.166359332507366e-12, -4.325564267270984e-12, -4.490852742092035e-12, -4.662457221285716e-12, -4.840619052099417e-12, -5.025588804147768e-12, -5.217626621818061e-12, -5.417002590141975e-12, -5.623997114647929e-12, -5.838901315728424e-12, -6.062017438076973e-12, -6.293659275770435e-12, -6.534152613594675e-12, -6.78383568523421e-12, -7.043059648970176e-12, -7.312189081555842e-12, -7.591602490963962e-12, -7.881692848727411e-12, -8.182868142621652e-12, -8.495551950466332e-12, -8.820184035853156e-12, -9.15722096663754e-12, -9.507136757064422e-12, -9.870423534430888e-12, -1.02475922312234e-11, -1.063917330370322e-11, -1.104571747795019e-11, -1.146779652441482e-11, -1.190600406206736e-11, -1.236095639327521e-11, -1.283329337058302e-11, -1.332367929661385e-11, -1.383280385835792e-11, -1.436138309716231e-11, -1.49101604157861e-11, -1.547990762393745e-11, -1.607142602376252e-11, -1.668554753681362e-11, -1.732313587408112e-11, -1.798508775073491e-11, -1.867233414728374e-11, -1.93858416189261e-11, -2.012661365493444e-11, -2.089569208998439e-11, -2.169415856941389e-11, -2.252313607047341e-11, -2.3383790481706e-11, -2.427733224267929e-11, -2.520501804637544e-11, -2.616815260663269e-11, -2.716809049312562e-11, -2.820623803646293e-11, -2.928405530608453e-11, -3.040305816373777e-11, -3.156482039542173e-11, -3.277097592479847e-11, -3.402322111118274e-11, -3.532331713534426e-11, -3.6673092476476e-11, -3.807444548381378e-11, -3.952934704652319e-11, -4.103984336560866e-11, -4.260805883174417e-11, -4.423619901307228e-11, -4.592655375717386e-11, -4.7681500411572e-11, -4.950350716729754e-11, -5.139513653022173e-11, -5.335904892503575e-11, -5.539800643694698e-11, -5.75148766963555e-11, -5.971263691197102e-11, -6.199437805804752e-11, -6.436330922162082e-11, -6.682276211586461e-11, -6.937619576591422e-11, -7.202720137374497e-11, -7.477950736895132e-11, -7.76369846525285e-11, -8.06036520410312e-11, -8.368368191876838e-11, -8.688140610598166e-11, -9.020132195126209e-11, -9.36480986567735e-11, -9.722658384517911e-11, -1.009418103775075e-10, -1.04799003431545e-10, -1.088035878507145e-10, -1.129611957737721e-10, -1.172776745560554e-10, -1.217590949934249e-10, -1.264117598604599e-10, -1.31242212774927e-10, -1.362572474009795e-10, -1.414639170040391e-10, -1.46869544370795e-10, -1.524817321082709e-10, -1.583083733364506e-10, -1.643576627894993e-10, -1.706381083411901e-10, -1.771585429707545e-10, -1.839281371859767e-10, -1.909564119210144e-10, -1.982532519270806e-10, -2.058289196748232e-10, -2.136940697879549e-10, -2.218597640284349e-10, -2.303374868542783e-10, -2.391391615718789e-10, -2.482771671055575e-10, -2.577643554079317e-10, -2.676140695355816e-10, -2.778401624154508e-10, -2.884570163283659e-10, -2.994795631370855e-10, -3.109233052873283e-10, -3.228043376113137e-10, -3.351393699644937e-10, -3.479457507273044e-10, -3.612414912049976e-10, -3.750452909598803e-10, -3.89376564111568e-10, -4.042554666422814e-10, -4.197029247455565e-10, -4.357406642582696e-10, -4.523912412173636e-10, -4.69678073584241e-10, -4.876254741814905e-10, -5.062586848882139e-10, -5.256039121421081e-10, -5.456883637982091e-10, -5.665402873961288e-10, -5.88189009889672e-10, -6.106649788946298e-10, -6.339998055128459e-10, -6.582263087927565e-10, -6.833785618889215e-10, -7.094919399855471e-10, -7.366031700513181e-10, -7.647503824955976e-10, -7.939731647986326e-10, -8.243126171911702e-10, -8.558114104618874e-10, -8.885138459738426e-10, -9.224659179744624e-10, -9.577153782866777e-10, -9.943118034721642e-10, -1.032306664561275e-09, -1.07175339944762e-09, -1.112707488049222e-09, -1.155226530341968e-09, -1.19937032737504e-09, -1.245200965382452e-09, -1.292782903108816e-09, -1.34218306247236e-09, -1.393470922692627e-09, -1.446718618015272e-09, -1.50200103917153e-09, -1.559395938714926e-09, -1.618984040383543e-09, -1.680849152641672e-09, -1.745078286560446e-09, -1.811761778203549e-09, -1.880993415689846e-09, -1.952870571111981e-09, -2.02749433749637e-09, -2.104969670997232e-09, -2.1854055385249e-09, -2.26891507101585e-09, -2.355615722560266e-09, -2.445629435610968e-09, -2.539082812506e-09, -2.636107293546467e-09, -2.736839341879857e-09, -2.841420635449155e-09, -2.94999826627781e-09, -3.062724947370725e-09, -3.179759227522773e-09, -3.301265714336564e-09, -3.427415305763679e-09, -3.558385430494952e-09, -3.694360297537939e-09, -3.835531155333068e-09, -3.982096560772611e-09, -4.134262658501395e-09, -4.29224347089208e-09, -4.45626119910288e-09, -4.626546535641787e-09, -4.803338988876554e-09, -4.986887219947508e-09, -5.177449392557153e-09, -5.375293536128554e-09, -5.580697922844059e-09, -5.793951459094309e-09, -6.015354091888895e-09, -6.24521723080042e-09, -6.483864186035499e-09, -6.731630623249873e-09, -6.988865035746847e-09, -7.255929234724314e-09, -7.533198858260012e-09, -7.821063899751204e-09, -8.119929256553132e-09, -8.430215299587534e-09, -8.752358464723736e-09, -9.086811866764343e-09, -9.434045936899563e-09, -9.79454908452791e-09, -1.01688283843743e-08, -1.055741028987317e-08, -1.096084137382069e-08, -1.137968909733838e-08, -1.181454260823127e-08, -1.226601356986369e-08, -1.273473702172021e-08, -1.322137227286291e-08, -1.372660382954292e-08, -1.42511423582723e-08, -1.479572568571174e-08, -1.536111983678283e-08, -1.594812011246541e-08, -1.655755220879839e-08, -1.719027337865958e-08, -1.784717363795982e-08, -1.852917701795113e-08, -1.923724286541108e-08, -1.997236719253536e-08, -2.073558407843908e-08, -2.152796712424081e-08, -2.235063096377871e-08, -2.32047328320864e-08, -2.409147419383773e-08, -2.501210243405486e-08, -2.596791261345974e-08, -2.696024929094337e-08, -2.799050841571934e-08, -2.906013929182686e-08, -3.017064661775208e-08, -3.132359260404024e-08, -3.252059917188299e-08, -3.376335023577863e-08, -3.505359407348112e-08, -3.639314578657881e-08, -3.778388985516878e-08, -3.922778279022843e-08, -4.072685588742229e-08, -4.228321808622414e-08, -4.389905893838696e-08, -4.557665168994173e-08, -4.73183564810731e-08, -4.912662366838115e-08, -5.100399727421239e-08, -5.29531185679263e-08, -5.497672978414356e-08, -5.707767798322132e-08, -5.925891905940032e-08, -6.152352190227298e-08, -6.387467271744678e-08, -6.63156795124927e-08, -6.884997675450986e-08, -7.148113020587533e-08, -7.421284194500174e-08, -7.704895557918701e-08, -7.999346165691123e-08, -8.305050328721908e-08, -8.622438197414035e-08, -8.951956367424691e-08, -9.294068508624183e-08, -9.649256018100528e-08, -1.001801869816145e-07, -1.040087546028586e-07, -1.079836505602265e-07, -1.121104683583653e-07, -1.163950153704259e-07, -1.208433210185144e-07, -1.254616452676679e-07, -1.302564874447101e-07, -1.352345953948474e-07, -1.404029749888649e-07, -1.457688999939396e-07, -1.513399223228344e-07, -1.57123882675314e-07, -1.63128921586928e-07, -1.693634909010154e-07, -1.75836365680044e-07, -1.825566565727901e-07, -1.895338226558629e-07, -1.967776847660561e-07, -2.04298439344457e-07, -2.121066728098959e-07, -2.202133764838114e-07, -2.286299620861821e-07, -2.373682778255855e-07, -2.464406251058273e-07, -2.558597758721447e-07, -2.656389906228867e-07, -2.757920371106956e-07, -2.863332097611346e-07, -2.972773498349839e-07, -3.086398663637775e-07, -3.204367578878555e-07, -3.326846350276509e-07, -3.454007439201083e-07, -3.586029905546717e-07, -3.723099660412979e-07, -3.865409728486632e-07, -4.013160520475409e-07, -4.166560116001333e-07, -4.325824557342272e-07, -4.49117815444158e-07, -4.662853801624118e-07, -4.841093306460698e-07, -5.026147731256743e-07, -5.218277747648866e-07, -5.417754004809069e-07, -5.624857511784154e-07, -5.839880034516896e-07, -6.063124508113453e-07, -6.294905464937112e-07, -6.535549479149856e-07, -6.785395628326632e-07, -7.04479597280768e-07, -7.314116053463454e-07, -7.593735408591862e-07, -7.884048110678949e-07, -8.185463323794852e-07, -8.498405882416191e-07, -8.823316892506524e-07, -9.160654355708539e-07, -9.51089381754812e-07, -9.874529040571984e-07, -1.025207270338788e-06, -1.064405712659281e-06, -1.105103502666097e-06, -1.147358029883315e-06, -1.191228883016361e-06, -1.236777934386478e-06, -1.284069427618194e-06, -1.333170068705034e-06, -1.384149120582508e-06, -1.437078501348501e-06, -1.492032886268742e-06, -1.549089813713576e-06, -1.608329795181072e-06, -1.669836429569254e-06, -1.733696521845593e-06, -1.800000206309475e-06, -1.868841074603057e-06, -1.940316308668976e-06, -2.014526818841563e-06, -2.091577387273937e-06, -2.171576816906248e-06, -2.254638086189593e-06, -2.340878509797613e-06, -2.430419905547693e-06, -2.523388767780518e-06, -2.619916447452281e-06, -2.720139339193664e-06, -2.824199075612624e-06, -2.932242729125189e-06, -3.044423021602697e-06, -3.160898542149727e-06, -3.281833973317978e-06, -3.407400326100486e-06, -3.537775184042236e-06, -3.673142956821121e-06, -3.813695143680087e-06, -3.95963060708649e-06, -4.111155857034305e-06, -4.268485346384691e-06, -4.431841777703855e-06, -4.601456422034433e-06, -4.777569450069482e-06, -4.960430276234559e-06, -5.150297916156722e-06, -5.34744135808607e-06, -5.55213994877999e-06, -5.764683794456053e-06, -5.985374177390996e-06, -6.214523988781694e-06, -6.452458178525485e-06, -6.699514222581496e-06, -6.956042608609496e-06, -7.222407340610256e-06, -7.49898646333459e-06, -7.786172607226736e-06, -8.084373554736088e-06, -8.394012828838179e-06, -8.715530304656472e-06, -9.049382845096431e-06, -9.396044961469404e-06, -9.756009500075856e-06, -1.012978835582851e-05, -1.051791321394193e-05, -1.092093632087325e-05, -1.133943128564316e-05, -1.177399391277885e-05, -1.222524306815042e-05, -1.269382157900709e-05, -1.318039716962027e-05, -1.368566343394087e-05, -1.421034084678712e-05, -1.475517781511297e-05, -1.532095177097393e-05, -1.590847030789527e-05, -1.651857236238867e-05, -1.715212944245712e-05, -1.781004690501377e-05, -1.849326528417557e-05, -1.920276167254964e-05, -1.993955115762533e-05, -2.07046883155811e-05, -2.149926876481702e-05, -2.232443078166439e-05, -2.318135698086455e-05, -2.407127606342393e-05, -2.499546463467055e-05, -2.595524909537202e-05, -2.695200760894579e-05, -2.798717214792194e-05, -2.906223062291634e-05, -3.017872909755078e-05, -3.13382740928962e-05, -3.254253498515234e-05, -3.379324650046058e-05, -3.50922113108852e-05, -3.644130273583685e-05, -3.784246755329199e-05, -3.9297728925469e-05, -4.080918944372614e-05, -4.237903429775053e-05, -4.400953457420559e-05, -4.57030506903748e-05, -4.746203596846681e-05, -4.928904035655254e-05, -5.11867143023578e-05, -5.315781278641187e-05, -5.52051995213339e-05, -5.733185132433852e-05, -5.954086267036738e-05, -6.183545043358853e-05, -6.421895882531179e-05, -6.669486453678356e-05, -6.926678209565676e-05, -7.193846944535995e-05, -7.47138337569789e-05, -7.759693748370246e-05, -8.059200466835825e-05, -8.370342751500084e-05, -8.693577323604363e-05, -9.029379118692875e-05, -9.37824203008836e-05, -9.740679683686033e-05, -0.0001011722624543871, -0.0001050843726296374, -0.0001091489054277165, -0.0001133718706468412, -0.0001177595193507612, -0.000122318353806594, -0.0001270551378459492, -0.0001319769076681194, -0.0001370909831048776, -0.0001424049793674319, -0.0001479268192969249, -0.0001536647461409101, -0.000159627336879258, -0.0001658235161240118, -0.0001722625706188149, -0.0001789541643648199, -0.0001859083544010321, -0.0001931356072685405, -0.000200646816189286, -0.0002084533189914979, -0.0002165669168154039, -0.0002249998936342937, -0.0002337650366277701, -0.0002428756574455321, -0.0002523456144019297, -0.0002621893356433192, -0.0002724218433320971, -0.0002830587788935067, -0.0002941164293731045, -0.0003056117549552398, -0.0003175624176949635, -0.0003299868115183389, -0.0003429040935484089, -0.0003563342168168584, -0.0003702979644239594, -0.000384816985212173, -0.0003999138310218732, -0.0004156119956004618, -0.0004319359552394245, -0.0004489112112172019, -0.0004665643341289541, -0.000484923010188097, -0.000504016089587914, -0.0005238736370155062, -0.0005445269844142056, -0.000566008786094672, -0.000588353076299078, -0.0006115953293272488, -0.0006357725223379428, -0.0006609232009432696, -0.0006870875477188919, -0.0007143074537576336, -0.0007426265933992146, -0.0007720905022738025, -0.0008027466588027194, -0.0008346445693047122, -0.0008678358568619929, -0.0009023743541057775, -0.0009383162000868414, -0.0009757199414023822, -0.001014646637756424, -0.001055159972136864, -0.001097326365798336, -0.001141215098245897, -0.001186898432420582, -0.001234451745293812, -0.001283953664083335, -0.001335486208309257, -0.001389134937914016, -0.001444989107675886, -0.001503141828150158, -0.001563690233377424, -0.001626735655602379, -0.001692383807250692, -0.001760744970414888, -0.001831934194102747, -0.001906071499504147, -0.001983282093533162, -0.002063696590902701, -0.002147451244988254, -0.002234688187735092, -0.002325555678860243, -0.002420208364595455, -0.002518807546210883, -0.002621521458550776, -0.002728525558801629, -0.002840002825700723, -0.002956144069376786, -0.003077148251996588, -0.003203222819369283, -0.003334584043635133, -0.003471457377136505, -0.003614077817535292, -0.003762690284203426, -0.003917550005870001, -0.004078922919459771, -0.004247086080003811, -0.004422328081441453, -0.004604949488064737, -0.004795263276280581, -0.00499359528628135, -0.005200284683121609, -0.005415684426595175, -0.005640161749193163, -0.005874098641298871, -0.006117892342637666, -0.006371955838850351, -0.006636718361893851, -0.006912625892793876, -0.007200141665079213, -0.007499746667014786, -0.007811940140520605, -0.008137240074413972, -0.008476183689342623, -0.008829327911485062, -0.00919724983178013, -0.009580547147110582, -0.009979838579502031, -0.01039576426900997, -0.01082898613555146, -0.01128018820449319, -0.01175007689033428, -0.01223938123231783, -0.01274885307527009, -0.01327926718839983, -0.0138314213141903, -0.0144061361388865, -0.01500425517541449, -0.01562664454887458, -0.01627419267402098, -0.01694780981338191, -0.0176484275038858, -0.01837699783904252, -0.01913449259288778, -0.01992190217103526, -0.02074023437329896, -0.02159051295145446, -0.02247377594480322, -0.02339107377530013, -0.02434346708310683, -0.02533202428254886, -0.02635781881760012, -0.02742192609519594, -0.02852542007390881, -0.02966936948481949, -0.03085483366079483, -0.03208285794987221, -0.03335446868805596, -0.03467066770658993, -0.03603242634870438, -0.03744067897096712, -0.03889631590474963, -0.04040017585395286, -0.04195303770609683, -0.04355561173518056, -0.04520853017640743, -0.04691233715502155, -0.04866747795411584, -0.05047428760945685, -0.05233297882314398, -0.05424362919234876, -0.05620616775454788, -0.05822036085658876, -0.06028579736172011, -0.06240187321641958, -0.06456777540751069, -0.06678246534979093, -0.06904466175518292, -0.07135282304639556, -0.07370512939124746, -0.07609946444820571, -0.0785333969294, -0.08100416210434058, -0.08350864338586686, -0.08604335415942474, -0.08860442003758068, -0.09118756174370216, -0.09378807885180988, -0.096400834633673, -0.09902024228905974, -0.1016402528604616, -0.1042543451593455, -0.1068555180566725, -0.1094362855157198, -0.11198867476967, -0.1145042280694342, -0.1169740084482001, -0.1193886099674554, -0.1217381729240127, -0.1240124045079217, -0.1262006054061027, -0.1282917028450358, -0.1302742905566295, -0.1321366761332645, -0.1338669362095432, -0.1354529798680358, -0.1368826206128166, -0.1381436571862123, -0.1392239634194311, -0.1401115872049835, -0.1407948585565557, -0.1412625065788312, -0.1415037850043997, -0.1415086057662987, -0.1412676798621349, -0.1407726645287463, -0.1400163154850716, -0.1389926427159522, -0.1376970679623514, -0.1361265817561038, -0.1342798974929298, -0.1321575996802477, -0.1297622831317508, -0.1270986795155662, -0.1241737673053785, -0.1209968608441409, -0.1175796739195083, -0.1139363529824379, -0.110083474930752, -0.10604000424501, -0.1018272042237187, -0.09746849713913786, -0.09298926834574577, -0.08841660974359572, -0.08377899855191129, -0.07910590810776688, -0.07442734839316101, -0.06977333523223087, -0.06517328860671395, -0.06065536232600445, -0.05624570936653013, -0.05196768956454688, -0.04784102899871225, -0.04388094331450372, -0.04009724038923311, -0.03649342106630235, -0.03306580013526873, -0.02980688550048022, -0.02671990304583427, -0.02381038546337588, -0.02108254566177908, -0.01853924317968531, -0.01618196328392985, -0.01401080964211426, -0.01202451134426885, -0.01022044491151625, -0.008594671770776774, -0.007141991493821054, -0.005856010897443087, -0.004729228880421765, -0.003753136634099374, -0.002918332609383012, -0.002214651356433173, -0.001631305077867351, -0.00115703645562204, -0.0007802810300769902, -0.0004893371300300209, -0.0002725398464257234, -0.0001184352809778799, -1.596025067931994e-05, 4.537595148049189e-05, 7.532645831634377e-05, 8.27464240092668e-05, 7.544705159490128e-05, 6.006611761291691e-05, 4.195893922143146e-05, 2.51135483369247e-05, 1.209390736001126e-05, 4.015030238307959e-06, 5.538897842605195e-07, -4.163336342344337e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 1, + "j": 3 + }, + { + "i": 1, + "radial_function": [ + -3.506920828889683e-08, -3.595698946542638e-08, -3.686724492646535e-08, -3.780054361108229e-08, -3.875746886109918e-08, -3.973861878569813e-08, -4.074460663525869e-08, -4.177606118465869e-08, -4.283362712627798e-08, -4.391796547295204e-08, -4.502975397112515e-08, -4.616968752446346e-08, -4.733847862819148e-08, -4.853685781442394e-08, -4.976557410877143e-08, -5.102539549850437e-08, -5.231710941256927e-08, -5.364152321375646e-08, -5.499946470332662e-08, -5.639178263841296e-08, -5.781934726252015e-08, -5.928305084945383e-08, -6.078380826101932e-08, -6.232255751883853e-08, -6.390026039064285e-08, -6.551790299140695e-08, -6.717649639970174e-08, -6.887707728964919e-08, -7.062070857887558e-08, -7.240848009286806e-08, -7.424150924614832e-08, -7.612094174069132e-08, -7.804795228202384e-08, -8.00237453134512e-08, -8.204955576887167e-08, -8.412664984464658e-08, -8.625632579101227e-08, -8.843991472352551e-08, -9.067878145505095e-08, -9.297432534881088e-08, -9.532798119302833e-08, -9.774122009771378e-08, -1.002155504141525e-07, -1.027525186776692e-07, -1.053537105742598e-07, -1.080207519316906e-07, -1.107553097356904e-07, -1.135590931718655e-07, -1.16433854693991e-07, -1.193813911193471e-07, -1.224035447517812e-07, -1.25502204533203e-07, -1.286793072242283e-07, -1.319368386147103e-07, -1.352768347649171e-07, -1.387013832781272e-07, -1.422126246054426e-07, -1.458127533836323e-07, -1.495040198068432e-07, -1.532887310330365e-07, -1.571692526260259e-07, -1.611480100340235e-07, -1.652274901056131e-07, -1.694102426441002e-07, -1.736988820012126e-07, -1.780960887111409e-07, -1.826046111659486e-07, -1.872272673333934e-07, -1.91966946518234e-07, -1.968266111681268e-07, -2.018092987252347e-07, -2.069181235247127e-07, -2.121562787412512e-07, -2.175270383848961e-07, -2.230337593473947e-07, -2.2867988350034e-07, -2.344689398464337e-07, -2.40404546725205e-07, -2.464904140745684e-07, -2.527303457496323e-07, -2.59128241900205e-07, -2.656881014084902e-07, -2.72414024388491e-07, -2.793102147486834e-07, -2.863809828195681e-07, -2.936307480477319e-07, -3.010640417581132e-07, -3.086855099861916e-07, -3.164999163818731e-07, -3.245121451868885e-07, -3.327272042875593e-07, -3.411502283448473e-07, -3.497864820036391e-07, -3.586413631832708e-07, -3.677204064513554e-07, -3.770292864830104e-07, -3.86573821607661e-07, -3.963599774456253e-07, -4.063938706367585e-07, -4.166817726634897e-07, -4.272301137706297e-07, -4.380454869844135e-07, -4.491346522332801e-07, -4.605045405729675e-07, -4.721622585185674e-07, -4.841150924862353e-07, -4.963705133473487e-07, -5.08936181097946e-07, -5.218199496463713e-07, -5.350298717221174e-07, -5.485742039089269e-07, -5.624614118053083e-07, -5.76700175315687e-07, -5.91299394075494e-07, -6.062681930135942e-07, -6.216159280555125e-07, -6.373521919710433e-07, -6.534868203698823e-07, -6.700298978490324e-07, -6.869917642958329e-07, -7.043830213505305e-07, -7.222145390324618e-07, -7.404974625339563e-07, -7.592432191862325e-07, -7.784635256016244e-07, -7.981703949966003e-07, -8.183761447001684e-07, -8.390934038523392e-07, -8.603351212974733e-07, -8.821145736774415e-07, -9.044453737296419e-07, -9.273414787950877e-07, -9.508171995418551e-07, -9.748872089093622e-07, -9.995665512790618e-07, -1.024870651877266e-06, -1.050815326416003e-06, -1.077416790977919e-06, -1.104691672151393e-06, -1.132657017422223e-06, -1.161330305828338e-06, -1.190729458884246e-06, -1.220872851781996e-06, -1.251779324875681e-06, -1.283468195456664e-06, -1.315959269826848e-06, -1.349272855677585e-06, -1.38342977478192e-06, -1.41845137600812e-06, -1.454359548662624e-06, -1.491176736170726e-06, -1.52892595010357e-06, -1.567630784560211e-06, -1.607315430913717e-06, -1.648004692930557e-06, -1.689724002272669e-06, -1.73249943439196e-06, -1.776357724827129e-06, -1.82132628591299e-06, -1.867433223912777e-06, -1.914707356584079e-06, -1.96317823118944e-06, -2.012876142962829e-06, -2.063832154043546e-06, -2.116078112889404e-06, -2.16964667418124e-06, -2.224571319231346e-06, -2.28088637690838e-06, -2.33862704509199e-06, -2.39782941267048e-06, -2.458530482095232e-06, -2.520768192506102e-06, -2.584581443442067e-06, -2.650010119152075e-06, -2.717095113521246e-06, -2.785878355627903e-06, -2.856402835947621e-06, -2.928712633220384e-06, -3.00285294199787e-06, -3.078870100887969e-06, -3.156811621514116e-06, -3.236726218207743e-06, -3.318663838452112e-06, -3.402675694096795e-06, -3.488814293362191e-06, -3.57713347365399e-06, -3.667688435208338e-06, -3.760535775588402e-06, -3.855733525054101e-06, -3.953341182827087e-06, -4.053419754273421e-06, -4.156031789027527e-06, -4.261241420080834e-06, -4.369114403859846e-06, -4.479718161318521e-06, -4.593121820070555e-06, -4.709396257588161e-06, -4.828614145493928e-06, -4.950849994973787e-06, -5.076180203339255e-06, -5.204683101767967e-06, -5.336439004252637e-06, -5.471530257788542e-06, -5.610041293831297e-06, -5.752058681056851e-06, -5.897671179456539e-06, -6.046969795801362e-06, -6.20004784050961e-06, -6.357000985953765e-06, -6.517927326242909e-06, -6.682927438517827e-06, -6.852104445797487e-06, -7.025564081415617e-06, -7.203414755087994e-06, -7.385767620651597e-06, -7.572736645517673e-06, -7.764438681882582e-06, -7.960993539740254e-06, -8.162524061742345e-06, -8.369156199952584e-06, -8.5810190945431e-06, -8.798245154482321e-06, -9.020970140264114e-06, -9.249333248730425e-06, -9.483477200040075e-06, -9.723548326837767e-06, -9.96969666567962e-06, -1.022207605077144e-05, -1.048084421007884e-05, -1.074616286386902e-05, -1.101819782574536e-05, -1.129711910623857e-05, -1.158310101901818e-05, -1.187632228979124e-05, -1.217696616795613e-05, -1.248522054108045e-05, -1.280127805227563e-05, -1.31253362205401e-05, -1.345759756414707e-05, -1.379826972715356e-05, -1.414756560910921e-05, -1.450570349804657e-05, -1.487290720683488e-05, -1.5249406212983e-05, -1.563543580197844e-05, -1.603123721425178e-05, -1.643705779585823e-05, -1.685315115297015e-05, -1.727977731027689e-05, -1.77172028733907e-05, -1.816570119535976e-05, -1.862555254739243e-05, -1.909704429389859e-05, -1.958047107195771e-05, -2.007613497532504e-05, -2.058434574309068e-05, -2.110542095310924e-05, -2.163968622032007e-05, -2.218747540008229e-05, -2.274913079665056e-05, -2.332500337692186e-05, -2.391545298958621e-05, -2.452084858981765e-05, -2.514156846964562e-05, -2.577800049415002e-05, -2.643054234362672e-05, -2.709960176187468e-05, -2.778559681075878e-05, -2.848895613120687e-05, -2.921011921080367e-05, -2.994953665814736e-05, -3.07076704841403e-05, -3.148499439038798e-05, -3.22819940648862e-05, -3.309916748518004e-05, -3.393702522918288e-05, -3.479609079384915e-05, -3.567690092189827e-05, -3.658000593679322e-05, -3.750597008618194e-05, -3.845537189401424e-05, -3.942880452155377e-05, -4.042687613750856e-05, -4.145021029751027e-05, -4.249944633317794e-05, -4.357523975100675e-05, -4.467826264133091e-05, -4.580920409761261e-05, -4.696877064631872e-05, -4.815768668765097e-05, -4.937669494740279e-05, -5.062655694022377e-05, -5.190805344457796e-05, -5.322198498969088e-05, -5.456917235478709e-05, -5.595045708092661e-05, -5.736670199575868e-05, -5.881879175151622e-05, -6.030763337658489e-05, -6.183415684098834e-05, -6.339931563613825e-05, -6.500408736920913e-05, -6.664947437250463e-05, -6.83365043281923e-05, -7.006623090879298e-05, -7.183973443381989e-05, -7.365812254297436e-05, -7.552253088631211e-05, -7.74341238318078e-05, -7.939409519075315e-05, -8.140366896143678e-05, -8.346410009156443e-05, -8.55766752598891e-05, -8.774271367753286e-05, -8.996356790949492e-05, -9.224062471684973e-05, -9.457530592015636e-05, -9.696906928460736e-05, -9.942340942746396e-05, -0.0001019398587483347, -0.0001045199883828681, -0.0001071654091804472, -0.0001098777727064845, -0.0001126587722699338, -0.0001155101439766476, -0.0001184336678092288, -0.0001214311687340341, -0.0001245045178360118, -0.0001276556334820654, -0.0001308864825136565, -0.0001341990814693767, -0.0001375954978382329, -0.0001410778513444157, -0.0001446483152643317, -0.0001483091177767057, -0.0001520625433465756, -0.0001559109341440196, -0.0001598566914984868, -0.00016390227738961, -0.0001680502159754095, -0.0001723030951588183, -0.0001766635681934752, -0.0001811343553297669, -0.0001857182455021103, -0.0001904180980585027, -0.0001952368445333845, -0.0002001774904648847, -0.0002052431172575521, -0.000210436884091694, -0.0002157620298804734, -0.000221221875275949, -0.0002268198247252576, -0.0002325593685781852, -0.0002384440852473879, -0.0002444776434225589, -0.0002506638043398751, -0.0002570064241080741, -0.0002635094560925624, -0.0002701769533589743, -0.0002770130711776426, -0.000284022069590475, -0.0002912083160417601, -0.0002985762880744777, -0.0003061305760937039, -0.000313875886198759, -0.0003218170430857742, -0.0003299589930223869, -0.0003383068068963349, -0.0003468656833397374, -0.000355640951930906, -0.0003646380764755745, -0.0003738626583694578, -0.0003833204400441327, -0.0003930173084982381, -0.0004029592989160679, -0.0004131525983756648, -0.0004236035496485646, -0.0004343186550934122, -0.0004453045806456963, -0.0004565681599059117, -0.0004681163983285192, -0.0004799564775140977, -0.0004920957596071772, -0.0005045417918022612, -0.0005173023109606203, -0.0005303852483404956, -0.0005437987344434033, -0.0005575511039792893, -0.0005716509009533572, -0.0005861068838774338, -0.0006009280311088136, -0.0006161235463195853, -0.0006317028640994919, -0.0006476756556954712, -0.0006640518348910608, -0.0006808415640289336, -0.0006980552601799034, -0.0007157036014617844, -0.0007337975335115981, -0.0007523482761146575, -0.0007713673299941462, -0.0007908664837648907, -0.0008108578210550721, -0.0008313537277997384, -0.0008523668997100133, -0.0008739103499220079, -0.0008959974168295036, -0.0009186417721045355, -0.0009418574289101413, -0.0009656587503095529, -0.0009900604578762316, -0.00101507764050923, -0.001040725763458397, -0.001067020677564117, -0.001093978628716244, -0.001121616267537075, -0.001149950659293234, -0.001178999294041419, -0.001208780097013095, -0.001239311439243255, -0.001270612148448459, -0.001302701520159491, -0.001335599329113965, -0.001369325840914414, -0.001403901823957373, -0.001439348561639101, -0.001475687864843672, -0.00151294208471919, -0.001551134125748063, -0.001590287459117223, -0.001630426136394354, -0.001671574803516234, -0.001713758715095307, -0.001757003749050793, -0.001801336421570596, -0.001846783902410379, -0.001893374030536276, -0.001941135330117643, -0.001990097026876483, -0.002040289064800057, -0.002091742123223346, -0.002144487634288022, -0.002198557800784616, -0.002253985614384649, -0.002310804874269434, -0.002369050206162338, -0.002428757081771291, -0.002489961838648238, -0.002552701700472405, -0.002617014797764031, -0.00268294018903531, -0.002750517882385257, -0.00281978885754501, -0.00289079508838026, -0.002963579565857223, -0.003038186321478532, -0.003114660451195415, -0.003193048139802224, -0.003273396685819461, -0.00335575452687111, -0.003440171265561994, -0.003526697695860719, -0.003615385829993389, -0.003706288925853317, -0.003799461514931403, -0.003894959430771721, -0.003992839837956607, -0.00409316126162492, -0.004195983617527177, -0.004301368242620479, -0.00440937792620595, -0.004520076941610851, -0.004633531078416961, -0.004749807675236421, -0.004868975653035479, -0.004991105549006024, -0.005116269550984122, -0.005244541532413874, -0.005375997087854445, -0.005510713569026887, -0.005648770121396764, -0.005790247721287526, -0.005935229213518417, -0.006083799349559904, -0.006236044826198164, -0.00639205432469901, -0.006551918550460456, -0.006715730273141356, -0.006883584367252557, -0.007055577853194965, -0.00723180993872743, -0.007412382060845735, -0.007597397928051567, -0.007786963562989068, -0.00798118734542378, -0.008180180055536952, -0.00838405491750565, -0.008592927643336551, -0.008806916476918898, -0.009026142238258959, -0.009250728367855596, -0.009480800971173476, -0.009716488863166778, -0.009957923612803475, -0.01020523958753591, -0.01045857399765993, -0.01071806694050085, -0.01098386144435982, -0.01125610351215034, -0.01153494216464932, -0.01182052948328262, -0.0121130206523595, -0.01241257400066498, -0.01271935104231366, -0.01303351651676215, -0.01335523842787111, -0.01368468808190161, -0.01402204012432267, -0.01436747257530093, -0.01472116686373445, -0.01508330785968573, -0.01545408390506002, -0.01583368684236659, -0.01622231204139164, -0.01662015842360179, -0.01702742848408717, -0.01744432831084311, -0.017871067601178, -0.01830785967502442, -0.01875492148491816, -0.01921247362239796, -0.01968074032056598, -0.02015994945253544, -0.02065033252547875, -0.02115212466997438, -0.02166556462433639, -0.02219089471359494, -0.02272836082277951, -0.02327821236414122, -0.02384070223793211, -0.02441608678634208, -0.0250046257401759, -0.02560658215783279, -0.02622222235613263, -0.02685181583251163, -0.02749563517808936, -0.02815395598108845, -0.02882705672006434, -0.02951521864638156, -0.03021872565534846, -0.03093786414539821, -0.03167292286468029, -0.03242419274439994, -0.03319196671821941, -0.03397653952700661, -0.03477820750819145, -0.0355972683689627, -0.03643402094250995, -0.03728876492648897, -0.03816180060285936, -0.03905342853821511, -0.03996394926370179, -0.04089366293358264, -0.04184286896149124, -0.04281186563337681, -0.04380094969612147, -0.04481041592078284, -0.04584055663938407, -0.04689166125415253, -0.04796401571807836, -0.04905790198564197, -0.05017359743253664, -0.05131137424318888, -0.05247149876486235, -0.05365423082711008, -0.0548598230253272, -0.0560885199671415, -0.05734055748036953, -0.05861616178126081, -0.05991554860174829, -0.06123892227442479, -0.06258647477397221, -0.06395838471377917, -0.06535481629650208, -0.06677591821734498, -0.06822182251886431, -0.069692643396142, -0.07118847595121323, -0.07270939489569253, -0.07425545320060188, -0.07582668069247961, -0.07742308259493488, -0.0790446380149057, -0.08069129837299448, -0.08236298577737261, -0.08405959134088929, -0.08578097344117301, -0.08752695592368581, -0.08929732624788378, -0.09109183357684478, -0.09291018681095764, -0.09475205256652026, -0.09661705310036911, -0.0985047641819687, -0.1004147129147129, -0.1023463755085498, -0.1042991750064241, -0.1062724789674473, -0.1082655971101547, -0.1102777789196874, -0.1123082112232528, -0.114356015738771, -0.1164202466022006, -0.1184998878796685, -0.1205938510711914, -0.1227009726134893, -0.1248200113901379, -0.1269496462580975, -0.1290884736004964, -0.1312350049164155, -0.13338766445935, -0.1355447869369784, -0.1377046152858803, -0.1398652985358867, -0.1420248897798282, -0.1441813442655746, -0.1463325176284059, -0.1484761642829476, -0.150609935995114, -0.1527313806557373, -0.1548379412788128, -0.1569269552485519, -0.1589956538406955, -0.1610411620448008, -0.1630604987154496, -0.1650505770815409, -0.167008205644003, -0.1689300894933684, -0.1708128320797165, -0.1726529374684291, -0.1744468131160686, -0.1761907732014074, -0.1778810425472067, -0.1795137611687485, -0.1810849894853107, -0.182590714230744, -0.1840268550990137, -0.1853892721599667, -0.1866737740796735, -0.1878761271783917, -0.1889920653575094, -0.1900173009246824, -0.1909475363437413, -0.1917784769327922, -0.1925058445301822, -0.193125392143652, -0.1936329195929642, -0.1940242901505645, -0.1942954481783427, -0.1944424377512773, -0.1944614222506363, -0.1943487049004192, -0.1941007502108522, -0.1937142062819488, -0.1931859279084139, -0.1925130004144984, -0.1916927641337929, -0.1907228394344254, -0.1896011521747, -0.1883259594579653, -0.1868958755384659, -0.1853098977122282, -0.1835674320087646, -0.1816683184806795, -0.179612855869328, -0.1774018254056788, -0.1750365134867357, -0.1725187329495477, -0.1698508426472723, -0.1670357650153634, -0.164077001301069, -0.1609786441165321, -0.1577453869653783, -0.154382530385223, -0.1508959843446984, -0.1472922665339105, -0.1435784961923779, -0.139762383129166, -0.1358522116067338, -0.1318568187837487, -0.1277855674434117, -0.1236483127733939, -0.1194553630119597, -0.1152174338327589, -0.1109455964087, -0.1066512191735297, -0.1023459033885522, -0.0980414127213349, -0.09374959715302564, -0.08948231165067942, -0.08525133016986942, -0.08106825568973679, -0.07694442712593226, -0.07289082411446723, -0.06891797080884944, -0.06503583998068455, -0.06125375885646613, -0.05758031825600208, -0.05402328671558482, -0.0505895313757725, -0.047284947482729, -0.04411439838620491, -0.04108166790840721, -0.0381894268975802, -0.03543921565920163, -0.03283144376691394, -0.03036540848543035, -0.02803933267959519, -0.02585042262906409, -0.02379494560931761, -0.02186832643100488, -0.02006526134717626, -0.01837984684085773, -0.01680571979615535, -0.01533620444167279, -0.01396446024762272, -0.01268362367606369, -0.01148693535278099, -0.01036784288316701, -0.009320756357612536, -0.008343045936590465, -0.007432812852864346, -0.006588167677698542, -0.005807217134807974, -0.005088048963478253, -0.004428715349863471, -0.003827215529111833, -0.003281478238476854, -0.002789344769925345, -0.002348553427074851, -0.001956726231787831, -0.001611358746870559, -0.001309813879079094, -0.001049320496583156, -0.000826977633051576, -0.0006397649517071863, -0.0004845600025396718, -0.0003581626194665635, -0.0002573265679453873, -0.0001787984647918447, -0.0001193634905681806, -7.589555307338514e-05, -4.541112299121311e-05, -2.512709420001563e-05, -1.251734506828653e-05, -5.365781722357216e-06, -1.811516240913733e-06, -3.813029983983258e-07, -3.424517964882058e-09, 3.721016875076159e-09, -2.394569775581931e-08, -1.001573612718562e-08, 1.734723475976807e-18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 1 + }, + { + "i": 1, + "radial_function": [ + -5.777137821780009e-13, -5.997893821184943e-13, -6.227085349860045e-13, -6.465034746332536e-13, -6.712076666335485e-13, -6.968558553473171e-13, -7.234841127871735e-13, -7.511298893502091e-13, -7.798320664888651e-13, -8.096310113944824e-13, -8.405686337704053e-13, -8.726884447745105e-13, -9.060356182140554e-13, -9.406570540789024e-13, -9.7660144450249e-13, -1.013919342243289e-12, -1.052663231783095e-12, -1.092887603142136e-12, -1.134649028514785e-12, -1.178006241833723e-12, -1.223020221374368e-12, -1.269754275515829e-12, -1.318274131778943e-12, -1.368648029266651e-12, -1.420946814636726e-12, -1.475244041741777e-12, -1.531616075076766e-12, -1.590142197179426e-12, -1.650904720134686e-12, -1.713989101339948e-12, -1.779484063693937e-12, -1.847481720378271e-12, -1.918077704407177e-12, -1.991371303127593e-12, -2.067465597858787e-12, -2.146467608867898e-12, -2.228488445885325e-12, -2.313643464371596e-12, -2.402052427755572e-12, -2.493839675872083e-12, -2.589134299835926e-12, -2.688070323598195e-12, -2.790786892440271e-12, -2.897428468670519e-12, -3.008145034799094e-12, -3.123092304476339e-12, -3.242431941491708e-12, -3.366331787141103e-12, -3.494966096282346e-12, -3.628515782410944e-12, -3.767168672100616e-12, -3.911119769166647e-12, -4.060571528923443e-12, -4.215734142922102e-12, -4.376825834568456e-12, -4.544073166037231e-12, -4.71771135691417e-12, -4.897984615014172e-12, -5.08514647984068e-12, -5.279460179169468e-12, -5.48119899925821e-12, -5.690646669202707e-12, -5.908097759980116e-12, -6.133858098740563e-12, -6.368245198929848e-12, -6.611588706847921e-12, -6.864230865271596e-12, -7.126526994793186e-12, -7.398845993552238e-12, -7.681570856063215e-12, -7.97509921186871e-12, -8.279843884775796e-12, -8.596233473462258e-12, -8.924712954268945e-12, -9.26574430702645e-12, -9.6198071647958e-12, -9.987399488437518e-12, -1.036903826695745e-11, -1.076526024461442e-11, -1.11766226758126e-11, -1.160370410883986e-11, -1.20471051995547e-11, -1.250744955616631e-11, -1.298538461629547e-11, -1.348158255755032e-11, -1.399674124289715e-11, -1.453158520215631e-11, -1.508686665100362e-11, -1.566336654891003e-11, -1.626189569750808e-11, -1.688329588092944e-11, -1.752844104971776e-11, -1.819823854998179e-11, -1.889363039951753e-11, -1.961559461269443e-11, -2.036514657596841e-11, -2.114334047595738e-11, -2.195127078208686e-11, -2.279007378589135e-11, -2.366092919913687e-11, -2.456506181301117e-11, -2.550374322071692e-11, -2.647829360588895e-11, -2.749008359935243e-11, -2.854053620683259e-11, -2.963112881032691e-11, -3.076339524595615e-11, -3.193892796121513e-11, -3.315938025465831e-11, -3.442646860117041e-11, -3.57419750660911e-11, -3.710774981159143e-11, -3.852571369882542e-11, -3.999786098951747e-11, -4.152626215078586e-11, -4.311306676714537e-11, -4.476050656378785e-11, -4.647089854538993e-11, -4.824664825486501e-11, -5.009025315664154e-11, -5.200430614922644e-11, -5.399149921199513e-11, -5.605462719133599e-11, -5.819659173147594e-11, -6.042040535551507e-11, -6.272919570241014e-11, -6.512620992586767e-11, -6.761481926133245e-11, -7.019852376749626e-11, -7.28809572489956e-11, -7.566589236722041e-11, -7.855724594642631e-11, -8.155908448261063e-11, -8.467562986290167e-11, -8.791126530350604e-11, -9.127054151456554e-11, -9.475818310059546e-11, -9.837909520550713e-11, -1.021383704115603e-10, -1.060412959019521e-10, -1.10093360897111e-10, -1.143002643751651e-10, -1.186679230874347e-10, -1.232024798802302e-10, -1.279103123346586e-10, -1.327980417365897e-10, -1.378725423894026e-10, -1.431409512826088e-10, -1.486106781299547e-10, -1.54289415791121e-10, -1.601851510916797e-10, -1.663061760565269e-10, -1.726610995725966e-10, -1.792588594972558e-10, -1.861087352294174e-10, -1.932203607610505e-10, -2.006037382274492e-10, -2.082692519753229e-10, -2.162276831684915e-10, -2.244902249517373e-10, -2.330684981941381e-10, -2.419745678340346e-10, -2.512209598486189e-10, -2.608206788720178e-10, -2.707872264866569e-10, -2.811346202136246e-10, -2.918774132287759e-10, -3.030307148322775e-10, -3.14610211700418e-10, -3.266321899495701e-10, -3.391135580433271e-10, -3.520718705750785e-10, -3.655253529594328e-10, -3.794929270672627e-10, -3.939942378404196e-10, -4.090496809235466e-10, -4.246804313519044e-10, -4.409084733355122e-10, -4.577566311815536e-10, -4.752486013985322e-10, -4.934089860273248e-10, -5.122633272460734e-10, -5.318381432975407e-10, -5.521609657895238e-10, -5.732603784207885e-10, -5.951660571869835e-10, -6.179088121231622e-10, -6.415206306415695e-10, -6.66034722525717e-10, -6.914855666440585e-10, -7.179089594489225e-10, -7.453420653290548e-10, -7.738234688864886e-10, -8.033932292114102e-10, -8.340929362313242e-10, -8.659657692138013e-10, -8.990565575051914e-10, -9.334118435906801e-10, -9.690799485644832e-10, -1.00611104010231e-09, -1.044557203031657e-09, -1.08447251259939e-09, -1.12591311053954e-09, -1.1689372840485e-09, -1.213605547778696e-09, -1.259980728966074e-09, -1.308128055811354e-09, -1.358115249239218e-09, -1.410012618164752e-09, -1.463893158401127e-09, -1.519832655347684e-09, -1.577909790603076e-09, -1.638206252653369e-09, -1.700806851790962e-09, -1.765799639426121e-09, -1.833276031958892e-09, -1.903330939385983e-09, -1.976062898823393e-09, -2.051574213132908e-09, -2.129971094847546e-09, -2.211363815598437e-09, -2.295866861253739e-09, -2.383599092987656e-09, -2.474683914506566e-09, -2.569249445667562e-09, -2.667428702733773e-09, -2.76935978552039e-09, -2.875186071694723e-09, -2.985056418503933e-09, -3.099125372214442e-09, -3.217553385557849e-09, -3.340507043489715e-09, -3.468159297578781e-09, -3.600689709356973e-09, -3.738284702972763e-09, -3.881137827503472e-09, -4.029450029296377e-09, -4.183429934721594e-09, -4.343294143735399e-09, -4.509267534667282e-09, -4.681583580659766e-09, -4.860484678207292e-09, -5.046222488256192e-09, -5.239058290346789e-09, -5.43926335029626e-09, -5.647119301940155e-09, -5.862918543470546e-09, -6.08696464892897e-09, -6.31957279543408e-09, -6.561070206745875e-09, -6.81179661379138e-09, -7.072104732800991e-09, -7.34236076172891e-09, -7.622944895657367e-09, -7.914251861911178e-09, -8.21669147563637e-09, -8.530689216626359e-09, -8.856686828208415e-09, -9.19514293903472e-09, -9.546533708654646e-09, -9.911353497777958e-09, -1.029011556417449e-08, -1.068335278519076e-08, -1.109161840790294e-08, -1.151548682796353e-08, -1.195555439824009e-08, -1.241244026838676e-08, -1.288678725653219e-08, -1.337926275431385e-08, -1.38905596665351e-08, -1.44213973867704e-08, -1.497252281029562e-08, -1.554471138577194e-08, -1.61387682071676e-08, -1.675552914745861e-08, -1.739586203570757e-08, -1.806066787918304e-08, -1.87508821322432e-08, -1.946747601377606e-08, -2.021145787505574e-08, -2.09838746199456e-08, -2.178581317945473e-08, -2.261840204272887e-08, -2.348281284663888e-08, -2.438026202621189e-08, -2.531201252823578e-08, -2.62793755904595e-08, -2.728371258890131e-08, -2.832643695587669e-08, -2.940901617145637e-08, -3.053297383116857e-08, -3.169989179286956e-08, -3.291141240581672e-08, -3.416924082509665e-08, -3.547514741468147e-08, -3.683097024251107e-08, -3.823861767113246e-08, -3.970007104755989e-08, -4.121738749616272e-08, -4.279270281853361e-08, -4.44282345044408e-08, -4.61262848581284e-08, -4.788924424438905e-08, -4.97195944590304e-08, -5.161991222836749e-08, -5.359287284307969e-08, -5.564125393100362e-08, -5.776793937472691e-08, -5.99759233793091e-08, -6.226831469580396e-08, -6.464834100690555e-08, -6.711935348036892e-08, -6.96848314974283e-08, -7.234838756228261e-08, -7.511377239976563e-08, -7.798488024876085e-08, -8.096575435831563e-08, -8.406059269492743e-08, -8.727375386848694e-08, -9.060976328571671e-08, -9.407331953943774e-08, -9.766930104357278e-08, -1.014027729220884e-07, -1.052789941628832e-07, -1.093034250456753e-07, -1.134817348560176e-07, -1.17819809894268e-07, -1.22323761792958e-07, -1.26999936153256e-07, -1.318549215131353e-07, -1.368955586600313e-07, -1.421289503012063e-07, -1.475624711059482e-07, -1.532037781332036e-07, -1.59060821660419e-07, -1.651418564281011e-07, -1.714554533171999e-07, -1.780105114754468e-07, -1.848162709093073e-07, -1.918823255611407e-07, -1.992186368888889e-07, -2.068355479682917e-07, -2.147437981379343e-07, -2.229545382077369e-07, -2.314793462529193e-07, -2.403302440163777e-07, -2.495197139421777e-07, -2.590607168651514e-07, -2.689667103827198e-07, -2.792516679335709e-07, -2.899300986121609e-07, -3.010170677469422e-07, -3.125282182719874e-07, -3.244797929227381e-07, -3.368886572879576e-07, -3.497723237509354e-07, -3.631489763550223e-07, -3.77037496628051e-07, -3.914574904052642e-07, -4.064293156865229e-07, -4.219741115710474e-07, -4.381138283093788e-07, -4.548712585167805e-07, -4.722700695943514e-07, -4.903348374028576e-07, -5.090910812393267e-07, -5.285653001685672e-07, -5.487850107586177e-07, -5.697787862792294e-07, -5.915762974177476e-07, -6.142083545728221e-07, -6.377069517871312e-07, -6.621053123834802e-07, -6.874379363715391e-07, -7.137406496926593e-07, -7.410506553772084e-07, -7.694065866887866e-07, -7.988485623308242e-07, -8.294182437993482e-07, -8.611588949669998e-07, -8.941154439818144e-07, -9.283345475762214e-07, -9.638646578773044e-07, -1.000756091822074e-06, -1.039061103274427e-06, -1.078833957952566e-06, -1.120131011281816e-06, -1.163010789281121e-06, -1.207534072610066e-06, -1.253763983896567e-06, -1.301766078476359e-06, -1.351608438683344e-06, -1.403361771824996e-06, -1.457099511993358e-06, -1.512897925866212e-06, -1.570836222648639e-06, -1.630996668329004e-06, -1.693464704419164e-06, -1.758329071349154e-06, -1.825681936718848e-06, -1.895619028580578e-06, -1.968239773972936e-06, -2.043647442901583e-06, -2.121949297993995e-06, -2.203256750053146e-06, -2.287685519744787e-06, -2.375355805668522e-06, -2.466392459067839e-06, -2.560925165448192e-06, -2.659088633379103e-06, -2.761022790773263e-06, -2.866872988944228e-06, -2.976790214754798e-06, -3.090931311191464e-06, -3.209459206700869e-06, -3.332543153642088e-06, -3.460358976235929e-06, -3.593089328389108e-06, -3.730923961797307e-06, -3.87406000475796e-06, -4.022702252113096e-06, -4.17706346679294e-06, -4.337364693433944e-06, -4.503835584560105e-06, -4.676714739857755e-06, -4.856250059067849e-06, -5.042699109076292e-06, -5.236329505773014e-06, -5.437419311305425e-06, -5.646257447353576e-06, -5.86314412510051e-06, -6.088391292596557e-06, -6.322323100232195e-06, -6.565276385080631e-06, -6.817601174909573e-06, -7.079661212672923e-06, -7.351834502351622e-06, -7.634513877037834e-06, -7.928107590207227e-06, -8.233039931149059e-06, -8.549751865586655e-06, -8.878701702551833e-06, -9.220365788624938e-06, -9.575239230714088e-06, -9.943836648592823e-06, -1.03266929584483e-05, -1.072436418879848e-05, -1.113742833014556e-05, -1.156648621982064e-05, -1.201216246354989e-05, -1.247510639530935e-05, -1.295599307713632e-05, -1.345552434063785e-05, -1.397442987198822e-05, -1.451346834233248e-05, -1.507342858556892e-05, -1.565513082557962e-05, -1.6259427955087e-05, -1.688720686840325e-05, -1.75393898504488e-05, -1.821693602451201e-05, -1.892084286137353e-05, -1.965214775250104e-05, -2.041192965015964e-05, -2.120131077744949e-05, -2.202145841134879e-05, -2.2873586742082e-05, -2.375895881218905e-05, -2.467888853893335e-05, -2.563474282375716e-05, -2.662794375276163e-05, -2.765997089230548e-05, -2.873236368405345e-05, -2.98467239440179e-05, -3.10047184703065e-05, -3.220808176457498e-05, -3.345861887238845e-05, -3.475820834795368e-05, -3.610880534896108e-05, -3.751244486752947e-05, -3.897124510355819e-05, -4.04874109871067e-05, -4.206323785668993e-05, -4.370111530081693e-05, -4.540353117032033e-05, -4.717307576954124e-05, -4.901244623471323e-05, -5.092445110836137e-05, -5.29120151189549e-05, -5.497818417549864e-05, -5.712613058723153e-05, -5.935915851911397e-05, -6.168070969429117e-05, -6.409436935530513e-05, -6.660387249639426e-05, -6.921311037982373e-05, -7.192613734987002e-05, -7.47471779587135e-05, -7.768063441925205e-05, -8.07310944005661e-05, -8.390333918254967e-05, -8.72023521870904e-05, -9.063332790397124e-05, -9.420168123067907e-05, -9.791305724616531e-05, -0.0001017733414396681, -0.0001057886704167514, -0.0001099654431058242, -0.0001143103324895267, -0.0001188302978866853, -0.0001235325978117254, -0.0001284248034398096, -0.0001335148127074112, -0.0001388108650795201, -0.000144321557016093, -0.0001500558581722521, -0.0001560231283681703, -0.0001622331353665675, -0.0001686960734975299, -0.0001754225831723368, -0.0001824237713300913, -0.0001897112328630513, -0.0001972970730688156, -0.0002051939311799834, -0.0002134150050242286, -0.0002219740768704122, -0.0002308855405190989, -0.0002401644296985086, -0.0002498264478301053, -0.0002598879992308983, -0.0002703662218229076, -0.0002812790214235087, -0.0002926451076938777, -0.0003044840318264679, -0.0003168162260561636, -0.0003296630450837438, -0.0003430468095043712, -0.0003569908513381575, -0.0003715195617641725, -0.0003866584411639789, -0.0004024341515853996, -0.0004188745717423641, -0.000436008854671511, -0.0004538674881717741, -0.0004724823581584452, -0.0004918868150687917, -0.00051211574346234, -0.0005332056349645004, -0.0005551946647086418, -0.0005781227714377693, -0.0006020317414334288, -0.0006269652964459556, -0.0006529691858068558, -0.0006800912829108157, -0.0007083816862617314, -0.0007378928252839599, -0.0007686795711071036, -0.0008007993525394144, -0.0008343122774520531, -0.0008692812598032652, -0.0009057721525383695, -0.0009438538866082152, -0.0009835986163552902, -0.001025081871522966, -0.001068382716149372, -0.001113583914613098, -0.00116077210510312, -0.001210037980790068, -0.001261476478980007, -0.001315186978535408, -0.00137127350585032, -0.001429844949668638, -0.001491015285034526, -0.001554903806663663, -0.001621635372021489, -0.00169134065439124, -0.001764156406208627, -0.001840225732932831, -0.00191969837771351, -0.002002731017101305, -0.002089487568034239, -0.002180139506314402, -0.002274866196767598, -0.002373855235253633, -0.002477302802665277, -0.002585414031020247, -0.002698403381711436, -0.002816495035936484, -0.0029399232972774, -0.003068933006344188, -0.003203779967332556, -0.003344731386274493, -0.003492066320680174, -0.003646076140181457, -0.00380706499768786, -0.003975350310457323, -0.004151263250363088, -0.004335149242505314, -0.004527368471170296, -0.004728296391979687, -0.004938324248897294, -0.005157859594569654, -0.005387326812267601, -0.005627167637470115, -0.005877841676884368, -0.006139826922429712, -0.006413620257424232, -0.006699737951900507, -0.006998716143641765, -0.007311111301167514, -0.007637500664509987, -0.007978482659207014, -0.008334677278491593, -0.008706726428184946, -0.009095294228293126, -0.009501067264771283, -0.009924754784349486, -0.01036708882471193, -0.01082882427168622, -0.01131073883443011, -0.01181363292890219, -0.01233832945916819, -0.01288567348532973, -0.0134565317660668, -0.01405179216296167, -0.01467236289292339, -0.01531917161416094, -0.01599316433026316, -0.01669530409604265, -0.01742656950788876, -0.0181879529604653, -0.01898045865068468, -0.01980510030900279, -0.02066289863722116, -0.02155487843116143, -0.0224820653658125, -0.02344548241985486, -0.0244461459158558, -0.02548506115193106, -0.02656321760029454, -0.02768158364790109, -0.0288411008543541, -0.03004267770242198, -0.03128718281693596, -0.03257543762853498, -0.03390820845975166, -0.03528619801231474, -0.03671003623632803, -0.03818027056423906, -0.03969735549525055, -0.04126164151914594, -0.04287336337242627, -0.04453262762425304, -0.04623939959504444, -0.04799348961670263, -0.04979453865046141, -0.0516420032862803, -0.05353514015662379, -0.0554729898074575, -0.05745436008036109, -0.05947780907191703, -0.06154162774998805, -0.06364382232118758, -0.06578209645983323, -0.06795383352591257, -0.07015607891813198, -0.07238552272790168, -0.07463848288108865, -0.07691088897649373, -0.07919826705311948, -0.08149572554229573, -0.08379794268539519, -0.08609915572296187, -0.08839315218633514, -0.09067326364786728, -0.09293236231024551, -0.09516286083871235, -0.09735671586155688, -0.09950543558352137, -0.1016000919729162, -0.1036313379955059, -0.105589430375641, -0.107464258366648, -0.1092453790070664, -0.1109220593256555, -0.1124833259349523, -0.1139180224191034, -0.115214874875282, -0.1163625659077502, -0.1173498172979597, -0.118165481481509, -0.1187986418517607, -0.1192387217790579, -0.1194756020824719, -0.1194997465167433, -0.1193023346397147, -0.1188754012045594, -0.1182119809764264, -0.1173062576051571, -0.1161537148955911, -0.1147512885064651, -0.1130975157807596, -0.1111926810682752, -0.1090389535500964, -0.1066405142206139, -0.1040036683334901, -0.1011369392824995, -0.09805113957736199, -0.09475941430096943, -0.09127725221214524, -0.08762245950328222, -0.0838150911527269, -0.07987733484705015, -0.07583334260912997, -0.07170900557630197, -0.06753166785121847, -0.06332977601929103, -0.05913246181285159, -0.05496905652408864, -0.05086853714477527, -0.04685890585523748, -0.04296650640744662, -0.0392152831503552, -0.03562599092391269, -0.03221536678579023, -0.02899527750337036, -0.0259718599005581, -0.02314467443560322, -0.02050589572530508, -0.01804377623234737, -0.01575720075657511, -0.01364724867987743, -0.01171359969688779, -0.009954546148718435, -0.008367021778945605, -0.006946647439589115, -0.005687794064872687, -0.004583662992046378, -0.003626383441753367, -0.00280712667550137, -0.002116236025934137, -0.001543371647924563, -0.001077668466683446, -0.0007079054055009748, -0.0004226835626256475, -0.0002106105774816458, -6.048798417457313e-05, 3.85021008428299e-05, 9.66150676549038e-05, 0.0001233719673273959, 0.0001274032494677224, 0.0001163087193072065, 9.654063444652916e-05, 7.332504497230274e-05, 5.061873332320999e-05, 3.111239570541269e-05, 1.628625405793171e-05, 6.525479822237779e-06, 1.302689173605237e-06, -5.653395760139065e-07, -5.798832308223645e-07, -1.351074895503124e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 1, + "j": 2 + }, + { + "i": 1, + "radial_function": [ + -5.879861360765048e-13, -6.104542618297848e-13, -6.337809396185867e-13, -6.579989764359323e-13, -6.831424328959005e-13, -7.092466711370063e-13, -7.363484045560729e-13, -7.644857494425413e-13, -7.936982785858116e-13, -8.24027076931062e-13, -8.555147993617512e-13, -8.882057306901341e-13, -9.221458479401328e-13, -9.573828850101498e-13, -9.939663998068057e-13, -1.031947843943955e-12, -1.071380635105074e-12, -1.112320232170757e-12, -1.15482421321697e-12, -1.198952356493809e-12, -1.24476672449858e-12, -1.292331751261523e-12, -1.341714332966868e-12, -1.392983922036735e-12, -1.446212624810159e-12, -1.501475302954614e-12, -1.558849678752715e-12, -1.618416444412106e-12, -1.680259375552318e-12, -1.744465449028212e-12, -1.811124965255637e-12, -1.880331675211479e-12, -1.952182912286579e-12, -2.026779729177059e-12, -2.104227040006551e-12, -2.184633767879158e-12, -2.268112998070779e-12, -2.354782137074156e-12, -2.444763077721363e-12, -2.538182370615996e-12, -2.635171402116073e-12, -2.735866579118102e-12, -2.840409520902093e-12, -2.948947258307359e-12, -3.061632440519285e-12, -3.178623549757765e-12, -3.300085124169401e-12, -3.426187989236856e-12, -3.557109498030821e-12, -3.693033780642559e-12, -3.834152003147701e-12, -3.980662636465679e-12, -4.132771735492784e-12, -4.290693228901473e-12, -4.454649220013629e-12, -4.624870299170615e-12, -4.801595868039807e-12, -4.985074476313525e-12, -5.1755641712739e-12, -5.373332860715398e-12, -5.578658689735315e-12, -5.791830431922302e-12, -6.013147895493005e-12, -6.242922344947992e-12, -6.481476938840194e-12, -6.729147184271191e-12, -6.986281408754987e-12, -7.253241250112528e-12, -7.530402165086233e-12, -7.818153957389822e-12, -8.116901325935859e-12, -8.427064434012462e-12, -8.749079500209363e-12, -9.083399411924487e-12, -9.430494362314185e-12, -9.790852511582272e-12, -1.016498067353878e-11, -1.055340502839336e-11, -1.095667186278617e-11, -1.137534833809693e-11, -1.181002328811269e-11, -1.226130804717629e-11, -1.272983730998019e-11, -1.321627002421482e-11, -1.372129031732718e-11, -1.424560845869268e-11, -1.478996185855382e-11, -1.53551161051306e-11, -1.594186604136107e-11, -1.655103688278672e-11, -1.718348537815451e-11, -1.784010101436837e-11, -1.852180726748439e-11, -1.922956290150944e-11, -1.996436331683005e-11, -2.072724195016718e-11, -2.151927172802718e-11, -2.234156657569186e-11, -2.319528298387082e-11, -2.408162163521928e-11, -2.500182909300827e-11, -2.595719955432352e-11, -2.69490766702578e-11, -2.797885543565687e-11, -2.904798415107784e-11, -3.015796645971709e-11, -3.131036346217548e-11, -3.250679591203283e-11, -3.374894649532075e-11, -3.503856219709987e-11, -3.637745675846879e-11, -3.776751322746202e-11, -3.921068660742374e-11, -4.070900660658163e-11, -4.226458049269009e-11, -4.387959605675424e-11, -4.555632469000687e-11, -4.729712457846352e-11, -4.910444401954916e-11, -5.098082486546232e-11, -5.292890609811728e-11, -5.495142754069534e-11, -5.705123371102272e-11, -5.923127782219629e-11, -6.149462593608368e-11, -6.384446127553813e-11, -6.62840887013955e-11, -6.881693936054815e-11, -7.144657551163396e-11, -7.417669553512832e-11, -7.701113913488337e-11, -7.995389273843333e-11, -8.300909510365905e-11, -8.61810431396987e-11, -8.947419795029188e-11, -9.289319110805423e-11, -9.644283116851098e-11, -1.001281104330502e-10, -1.039542119703049e-10, -1.079265169058454e-10, -1.120506119904269e-10, -1.163322974574454e-10, -1.207775951806466e-10, -1.253927571435663e-10, -1.301842742326123e-10, -1.351588853661541e-10, -1.403235869724639e-10, -1.456856428298376e-10, -1.512525942827357e-10, -1.570322708483151e-10, -1.630328012282666e-10, -1.69262624741451e-10, -1.757305031934104e-10, -1.824455331994507e-10, -1.894171589786303e-10, -1.966551856366432e-10, -2.041697929562885e-10, -2.11971549714917e-10, -2.200714285489928e-10, -2.28480821386684e-10, -2.37211555470176e-10, -2.462759099902614e-10, -2.556866333565865e-10, -2.654569611278586e-10, -2.756006346272294e-10, -2.86131920269029e-10, -2.970656296240599e-10, -3.084171402516417e-10, -3.202024173277332e-10, -3.324380360995475e-10, -3.451412051982278e-10, -3.58329790842411e-10, -3.72022341966678e-10, -3.862381163102768e-10, -4.009971075027951e-10, -4.163200731848671e-10, -4.322285642035185e-10, -4.487449549231492e-10, -4.65892474694845e-10, -4.836952405282636e-10, -5.021782910120302e-10, -5.213676215304112e-10, -5.412902208257349e-10, -5.619741089580452e-10, -5.834483767153658e-10, -6.057432265299791e-10, -6.288900149583502e-10, -6.529212967843645e-10, -6.778708708079786e-10, -7.037738273836849e-10, -7.306665977756144e-10, -7.585870053987926e-10, -7.875743190185382e-10, -8.176693079829002e-10, -8.4891429956582e-10, -8.813532385016418e-10, -9.150317487948122e-10, -9.499971978915954e-10, -9.862987633041933e-10, -1.023987501780932e-09, -1.063116421119805e-09, -1.103740554726489e-09, -1.145917039021583e-09, -1.189705193806099e-09, -1.235166605698194e-09, -1.282365214758483e-09, -1.331367404425942e-09, -1.382242094890748e-09, -1.435060840035541e-09, -1.489897928081478e-09, -1.546830486080571e-09, -1.605938588401485e-09, -1.667305369361268e-09, -1.731017140161558e-09, -1.797163510293793e-09, -1.865837513584131e-09, -1.937135739055616e-09, -2.011158466791447e-09, -2.088009808990721e-09, -2.167797856415048e-09, -2.250634830431964e-09, -2.336637240869333e-09, -2.425926049902532e-09, -2.518626842205266e-09, -2.61487000160331e-09, -2.714790894479676e-09, -2.818530060189503e-09, -2.926233408752314e-09, -3.038052426100007e-09, -3.154144387169408e-09, -3.274672577138994e-09, -3.399806521121542e-09, -3.529722222635419e-09, -3.664602411190481e-09, -3.80463679933689e-09, -3.950022349538341e-09, -4.100963551245763e-09, -4.257672708560862e-09, -4.420370238894749e-09, -4.589284983041828e-09, -4.764654527105153e-09, -4.946725536726806e-09, -5.135754104093089e-09, -5.332006108203384e-09, -5.535757588909597e-09, -5.747295135252537e-09, -5.966916288642016e-09, -6.194929961447986e-09, -6.431656871592048e-09, -6.67742999375102e-09, -6.932595027807604e-09, -7.197510885207776e-09, -7.472550193909313e-09, -7.758099822632508e-09, -8.054561425150884e-09, -8.362352005388324e-09, -8.681904504118166e-09, -9.013668408090044e-09, -9.35811038244244e-09, -9.715714927291045e-09, -1.008698505941745e-08, -1.047244302001816e-08, -1.087263100951025e-08, -1.128811195042858e-08, -1.171947027948866e-08, -1.216731276993065e-08, -1.263226938530242e-08, -1.311499416588432e-08, -1.361616614900357e-08, -1.413649032453501e-08, -1.467669862693338e-08, -1.523755096519489e-08, -1.581983629219831e-08, -1.642437371493238e-08, -1.705201364717302e-08, -1.770363900623432e-08, -1.838016645547919e-08, -1.908254769434008e-08, -1.981177079766729e-08, -2.05688616062921e-08, -2.135488517076358e-08, -2.217094725029396e-08, -2.301819586902409e-08, -2.389782293180289e-08, -2.481106590175726e-08, -2.575920954201662e-08, -2.674358772404783e-08, -2.776558530514789e-08, -2.882664007774209e-08, -2.992824479323511e-08, -3.107194926326754e-08, -3.225936254134125e-08, -3.349215518788846e-08, -3.477206162197891e-08, -3.610088256298132e-08, -3.748048756562118e-08, -3.891281765201169e-08, -4.039988804436843e-08, -4.194379100226299e-08, -4.354669876841766e-08, -4.52108666271961e-08, -4.693863608010582e-08, -4.873243814279163e-08, -5.059479676817308e-08, -5.252833240055966e-08, -5.453576566574516e-08, -5.661992120228811e-08, -5.878373163949316e-08, -6.103024172740696e-08, -6.336261262512467e-08, -6.578412635301135e-08, -6.829819041533793e-08, -7.09083425999783e-08, -7.361825596158492e-08, -7.643174399575291e-08, -7.935276601094501e-08, -8.23854327063302e-08, -8.5534011962977e-08, -8.880293485675838e-08, -9.219680190148386e-08, -9.572038953106665e-08, -9.937865682985533e-08, -1.031767525206764e-07, -1.071200222204364e-07, -1.112140159735089e-07, -1.154644960738484e-07, -1.198774451863645e-07, -1.2445907477951e-07, -1.292158338808737e-07, -1.341544181680644e-07, -1.392817794078407e-07, -1.446051352569564e-07, -1.501319794382773e-07, -1.55870092307027e-07, -1.618275518217061e-07, -1.680127449353883e-07, -1.744343794238619e-07, -1.811014961665817e-07, -1.880234818986937e-07, -1.952100824516855e-07, -2.026714165017075e-07, -2.104179898447858e-07, -2.184607102193855e-07, -2.268109026974844e-07, -2.354803256656286e-07, -2.44481187418551e-07, -2.538261633901086e-07, -2.635284140441302e-07, -2.736016034521928e-07, -2.840599185836401e-07, -2.949180893361056e-07, -3.06191409334627e-07, -3.178957575281351e-07, -3.300476206160374e-07, -3.426641163347405e-07, -3.557630176381206e-07, -3.693627778066915e-07, -3.834825565206469e-07, -3.981422469341066e-07, -4.133625037893203e-07, -4.29164772610415e-07, -4.455713200190654e-07, -4.626052652146332e-07, -4.802906126638581e-07, -4.986522860472771e-07, -5.177161635095946e-07, -5.375091142663113e-07, -5.580590366166588e-07, -5.793948974188791e-07, -6.015467730837693e-07, -6.245458921445386e-07, -6.484246794649559e-07, -6.732168021481876e-07, -6.989572172127907e-07, -7.256822211034792e-07, -7.53429501108175e-07, -7.822381887544666e-07, -8.121489152644544e-07, -8.432038691417285e-07, -8.754468559821797e-07, -9.089233605852984e-07, -9.436806114615729e-07, -9.79767647827346e-07, -1.017235389181502e-06, -1.056136707565996e-06, -1.096526502615697e-06, -1.138461779501031e-06, -1.182001729882813e-06, -1.227207815988353e-06, -1.274143857938295e-06, -1.322876124442417e-06, -1.373473427002869e-06, -1.426007217756545e-06, -1.480551691099134e-06, -1.537183889239775e-06, -1.595983811837109e-06, -1.657034529880037e-06, -1.720422303972318e-06, -1.786236707198444e-06, -1.854570752745762e-06, -1.925521026475708e-06, -1.999187824622851e-06, -2.075675296842945e-06, -2.155091594798787e-06, -2.237549026515118e-06, -2.323164216720929e-06, -2.412058273420082e-06, -2.504356960923933e-06, -2.60019087961193e-06, -2.69969565267748e-06, -2.803012120134686e-06, -2.910286540366586e-06, -3.021670799526433e-06, -3.137322629080011e-06, -3.25740583182881e-06, -3.382090516734852e-06, -3.511553342905781e-06, -3.645977773087859e-06, -3.785554337058133e-06, -3.930480905293293e-06, -4.080962973333001e-06, -4.237213957251786e-06, -4.399455500685313e-06, -4.567917793871437e-06, -4.742839905172428e-06, -4.924470125587344e-06, -5.113066326762227e-06, -5.308896333036982e-06, -5.512238308091661e-06, -5.723381156769705e-06, -5.942624942683786e-06, -6.170281322240241e-06, -6.406673995733344e-06, -6.652139176190448e-06, -6.907026076688116e-06, -7.171697416868693e-06, -7.446529949438234e-06, -7.731915007443174e-06, -8.02825907315693e-06, -8.33598436946325e-06, -8.65552947462166e-06, -8.987349961375746e-06, -9.331919061378115e-06, -9.689728355963908e-06, -1.006128849432875e-05, -1.044712994023615e-05, -1.084780374840542e-05, -1.126388237177494e-05, -1.169596050093251e-05, -1.214465593697469e-05, -1.261061049920599e-05, -1.309449096906483e-05, -1.359699007178507e-05, -1.411882749732201e-05, -1.466075096215935e-05, -1.522353731367821e-05, -1.580799367883559e-05, -1.641495865898222e-05, -1.70453035727136e-05, -1.769993374875172e-05, -1.837978987091951e-05, -1.908584937735797e-05, -1.981912791626191e-05, -2.058068086045366e-05, -2.137160488326363e-05, -2.219303959826666e-05, -2.304616926553707e-05, -2.393222456721097e-05, -2.485248445524695e-05, -2.580827807443234e-05, -2.680098676377285e-05, -2.783204613958429e-05, -2.890294826371784e-05, -3.001524390051682e-05, -3.117054486625804e-05, -3.237052647499388e-05, -3.36169300848855e-05, -3.491156574929356e-05, -3.625631497709349e-05, -3.765313360686782e-05, -3.910405479983599e-05, -4.061119215660656e-05, -4.217674296305572e-05, -4.380299157087637e-05, -4.549231291858085e-05, -4.724717619901689e-05, -4.907014867971244e-05, -5.096389968264594e-05, -5.293120473034971e-05, -5.497494986556811e-05, -5.709813615197082e-05, -5.930388436384688e-05, -6.159543987296083e-05, -6.397617774121359e-05, -6.644960802807819e-05, -6.901938132221501e-05, -7.168929450710835e-05, -7.446329677097262e-05, -7.734549587168628e-05, -8.03401646679863e-05, -8.345174792863882e-05, -8.668486943189108e-05, -9.00443393680136e-05, -9.353516205836695e-05, -9.716254400502048e-05, -0.000100931902285591, -0.0001048488733086417, -0.0001089193219456959, -0.0001131493510566189, -0.0001175453114259178, -0.0001221138121282944, -0.0001268617313426297, -0.0001317962276344446, -0.0001369247517277891, -0.0001422550587884866, -0.0001477952212416512, -0.0001535536421473965, -0.0001595390691597992, -0.000165760609095298, -0.0001722277431378152, -0.000178950342709276, -0.0001859386860353318, -0.0001932034754375075, -0.0002007558553844121, -0.0002086074313360387, -0.0002167702894167644, -0.0002252570169541987, -0.0002340807239226624, -0.0002432550653318418, -0.0002527942646028638, -0.0002627131379759371, -0.0002730271199956106, -0.000283752290121639, -0.0002949054005155855, -0.0003065039050553578, -0.000318565989632119, -0.0003311106037862866, -0.0003441574937417181, -0.0003577272368995839, -0.0003718412778560071, -0.0003865219660100939, -0.0004017925948316626, -0.0004176774428607631, -0.0004342018165138578, -0.000451392094774464, -0.0004692757758489852, -0.0004878815258715533, -0.0005072392297446902, -0.0005273800442058695, -0.0005483364532131332, -0.000570142325746266, -0.0005928329761232009, -0.0006164452269346866, -0.0006410174747035762, -0.000666589758378307, -0.0006932038307735201, -0.0007209032330740341, -0.0007497333725214724, -0.0007797416034061748, -0.0008109773114898543, -0.0008434920019875479, -0.0008773393912400666, -0.0009125755022107308, -0.0009492587639426437, -0.0009874501151146719, -0.001027213111836191, -0.001068614039822112, -0.001111722031090545, -0.0011566091853262, -0.001203350696052456, -0.001252024981754465, -0.001302713822094374, -0.001355502499357577, -0.001410479945266012, -0.001467738893290507, -0.001527376036589177, -0.001589492191692639, -0.001654192468049018, -0.001721586443532943, -0.001791788346011658, -0.001864917241049018, -0.001941097225813397, -0.002020457629238929, -0.002103133218470155, -0.002189264411598425, -0.002278997496673663, -0.00237248485694719, -0.002469885202270001, -0.002571363806535981, -0.002677092751020287, -0.002787251173419691, -0.002902025522353562, -0.003021609817030456, -0.003146205911726764, -0.003276023764658467, -0.00341128171075599, -0.003552206737773644, -0.003699034765079437, -0.003852010924377319, -0.004011389841511689, -0.004177435918392624, -0.004350423613959459, -0.004530637722968642, -0.004718373651250142, -0.00491393768592238, -0.005117647258889847, -0.005329831201768729, -0.005550829990192924, -0.00578099597524686, -0.00602069359954906, -0.006270299595273865, -0.006530203161145249, -0.006800806115166303, -0.007082523019561497, -0.007375781274102887, -0.007681021173669113, -0.007998695925544138, -0.008329271621602463, -0.008673227160149278, -0.009031054111786141, -0.00940325652325762, -0.009790350652800853, -0.01019286463006952, -0.01061133803323791, -0.01104632137540956, -0.01149837549196113, -0.01196807081994834, -0.01245598656018725, -0.01296270971210773, -0.01348883397095646, -0.01403495847641112, -0.01460168640116015, -0.01518962336750831, -0.01579937567959613, -0.01643154835837583, -0.01708674296607919, -0.01776555520655236, -0.0184685722875279, -0.0191963700306733, -0.01994950971510397, -0.02072853463999768, -0.02153396639201324, -0.0223663008034103, -0.02322600358712348, -0.02411350563556315, -0.02502919797064473, -0.02597342633349476, -0.0269464854034774, -0.02794861263766921, -0.0289799817236916, -0.03004069564094631, -0.03113077932780599, -0.03225017195522719, -0.03339871881062716, -0.03457616279971187, -0.03578213557832167, -0.0370161483312987, -0.03827758222090669, -0.03956567853351037, -0.04087952856004402, -0.04221806325333528, -0.04358004271360269, -0.04496404556243525, -0.04636845827532574, -0.04779146455333648, -0.04923103482575513, -0.0506849159876132, -0.05215062148865295, -0.05362542190371979, -0.05510633612851291, -0.05659012335910084, -0.05807327602844982, -0.05955201388828402, -0.06102227943973265, -0.06247973493117318, -0.06391976115622991, -0.06533745829871265, -0.0667276490840338, -0.06808488450795325, -0.06940345242287825, -0.07067738926892767, -0.07190049524096462, -0.07306635318319232, -0.07416835149903396, -0.07519971135510713, -0.07615351844340172, -0.07702275954441065, -0.07780036410507069, -0.07847925100804348, -0.07905238066214929, -0.07951281248674971, -0.07985376779462783, -0.0800686979975731, -0.08015135796565498, -0.08009588426436931, -0.07989687787294922, -0.0795494908518082, -0.0790495162772582, -0.07839348059755769, -0.07757873738661625, -0.07660356128138175, -0.07546724068766948, -0.07417016762917421, -0.07271392289852845, -0.07110135445123612, -0.06933664676763965, -0.0674253787002547, -0.06537456713031994, -0.06319269358575284, -0.06088971083155357, -0.05847702634271887, -0.05596745951973925, -0.05337516951956281, -0.05071555066315214, -0.04800509255792206, -0.04526120235322635, -0.04250198694373995, -0.0397459934628319, -0.03701190707880021, -0.0343182059328441, -0.03168277404833397, -0.0291224742029358, -0.02665268409116774, -0.02428680061274042, -0.02203571879334702, -0.01990729366380247, -0.01790579536655111, -0.01603136979177891, -0.0142795191239112, -0.01264314995907856, -0.01112136967765878, -0.009714536759781622, -0.008422126171612254, -0.007242746414909712, -0.006174166956420007, -0.00521335613098689, -0.004356529455522146, -0.003599208119634434, -0.002936287234223289, -0.00236211322328056, -0.001870569538904548, -0.001455169667667622, -0.001109156181062054, -0.0008256043675160529, -0.0005975287722258943, -0.0004179907685121359, -0.0002802050953922552, -0.0001776431261154982, -0.0001041304862775677, -5.393620548907463e-05, -2.185064212483628e-05, -3.25172823302522e-06, 5.843649983273969e-06, 8.749495752344294e-06, 8.099967126588142e-06, 5.861840267917112e-06, 3.371526674577491e-06, 1.398037334657612e-06, 2.326742951147195e-07, -1.944476724396104e-07, -1.717743623175227e-07, -3.929345979494792e-08, 3.469446951953614e-18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 1, + "j": 3 + }, + { + "i": 2, + "radial_function": [ + 3.813985231564481e-08, 3.910536725527554e-08, 4.009532432102142e-08, 4.111034226820216e-08, 4.21510555160007e-08, 4.321811454399522e-08, 4.431218629873007e-08, 4.543395461057894e-08, 4.658412062116066e-08, 4.776340322157567e-08, 4.897253950173576e-08, 5.021228521106908e-08, 5.148341523088795e-08, 5.278672405871454e-08, 5.412302630486769e-08, 5.549315720162001e-08, 5.689797312524532e-08, 5.833835213128131e-08, 5.981519450334214e-08, 6.132942331582532e-08, 6.288198501086212e-08, 6.447384998987468e-08, 6.61060132201079e-08, 6.777949485651587e-08, 6.949534087939197e-08, 7.125462374813901e-08, 7.305844307159174e-08, 7.490792629530713e-08, 7.680422940625374e-08, 7.874853765534145e-08, 8.074206629824009e-08, 8.27860613549538e-08, 8.488180038862316e-08, 8.703059330404277e-08, 8.923378316639438e-08, 9.14927470407044e-08, 9.380889685255413e-08, 9.618368027057808e-08, 9.861858161130304e-08, 1.011151227668936e-07, 1.036748641563823e-07, 1.062994057009819e-07, 1.089903878240867e-07, 1.117494924765894e-07, 1.145784441881543e-07, 1.174790111451019e-07, 1.204530062955825e-07, 1.235022884827257e-07, 1.266287636064764e-07, 1.298343858148431e-07, 1.331211587253015e-07, 1.364911366771186e-07, 1.399464260153806e-07, 1.434891864075239e-07, 1.471216321931967e-07, 1.508460337682898e-07, 1.546647190040059e-07, 1.585800747018526e-07, 1.625945480854681e-07, 1.667106483302135e-07, 1.709309481314846e-07, 1.752580853127285e-07, 1.796947644741655e-07, 1.842437586832483e-07, 1.88907911207917e-07, 1.936901372937289e-07, 1.98593425985979e-07, 2.036208419979455e-07, 2.087755276264313e-07, 2.14060704715798e-07, 2.194796766717172e-07, 2.25035830525902e-07, 2.307326390531068e-07, 2.365736629417187e-07, 2.425625530192987e-07, 2.487030525344595e-07, 2.549989994965127e-07, 2.614543290743419e-07, 2.680730760560042e-07, 2.748593773705974e-07, 2.818174746739657e-07, 2.889517169998644e-07, 2.962665634782403e-07, 3.037665861223212e-07, 3.114564726862662e-07, 3.193410295951513e-07, 3.274251849491321e-07, 3.357139916036559e-07, 3.442126303276468e-07, 3.529264130416459e-07, 3.618607861379169e-07, 3.710213338846073e-07, 3.80413781916083e-07, 3.900440008116185e-07, 3.999180097646882e-07, 4.100419803451361e-07, 4.204222403565935e-07, 4.310652777915416e-07, 4.419777448864981e-07, 4.531664622798636e-07, 4.646384232750165e-07, 4.764007982113349e-07, 4.884609389458665e-07, 5.008263834484535e-07, 5.135048605131855e-07, 5.265042945891134e-07, 5.398328107332625e-07, 5.534987396890251e-07, 5.675106230931138e-07, 5.818772188143303e-07, 5.966075064274779e-07, 6.117106928258534e-07, 6.271962179758146e-07, 6.430737608170229e-07, 6.593532453120562e-07, 6.760448466491554e-07, 6.931589976020044e-07, 7.107063950505006e-07, 7.286980066665979e-07, 7.471450777694061e-07, 7.660591383538158e-07, 7.85452010297063e-07, 8.053358147477177e-07, 8.257229797017293e-07, 8.46626247770258e-07, 8.680586841441393e-07, 8.900336847599771e-07, 9.125649846729539e-07, 9.356666666415996e-07, 9.593531699298843e-07, 9.836392993321218e-07, 1.00854023442635e-06, 1.034071539061951e-06, 1.06024917108745e-06, 1.087089492324575e-06, 1.114609278794793e-06, 1.142825731204741e-06, 1.171756485697089e-06, 1.201419624873555e-06, 1.231833689096972e-06, 1.263017688079444e-06, 1.294991112763876e-06, 1.327773947506261e-06, 1.361386682566372e-06, 1.395850326914638e-06, 1.43118642136323e-06, 1.467417052029546e-06, 1.504564864140528e-06, 1.542653076186424e-06, 1.58170549443285e-06, 1.621746527800202e-06, 1.662801203119757e-06, 1.704895180775955e-06, 1.748054770744668e-06, 1.792306949037469e-06, 1.837679374562162e-06, 1.884200406410152e-06, 1.931899121581415e-06, 1.980805333158168e-06, 2.030949608938614e-06, 2.082363290542345e-06, 2.135078512999433e-06, 2.189128224835371e-06, 2.244546208664463e-06, 2.301367102304543e-06, 2.359626420426121e-06, 2.41936057674968e-06, 2.48060690680476e-06, 2.543403691265239e-06, 2.607790179875333e-06, 2.673806615981185e-06, 2.741494261683566e-06, 2.81089542362718e-06, 2.882053479442869e-06, 2.955012904859179e-06, 3.029819301500146e-06, 3.106519425386892e-06, 3.185161216160575e-06, 3.265793827045153e-06, 3.348467655568645e-06, 3.433234375061961e-06, 3.520146966955242e-06, 3.609259753891583e-06, 3.700628433679098e-06, 3.794310114102423e-06, 3.890363348615326e-06, 3.988848172937004e-06, 4.089826142574597e-06, 4.193360371295607e-06, 4.299515570574227e-06, 4.408358090036057e-06, 4.519955958926814e-06, 4.634378928630547e-06, 4.751698516264283e-06, 4.871988049376159e-06, 4.995322711774911e-06, 5.121779590519648e-06, 5.251437724098891e-06, 5.384378151829281e-06, 5.520683964504731e-06, 5.660440356327532e-06, 5.803734678154217e-06, 5.95065649208896e-06, 6.101297627459108e-06, 6.255752238207544e-06, 6.414116861737666e-06, 6.576490479248178e-06, 6.742974577594831e-06, 6.913673212718182e-06, 7.088693074676911e-06, 7.268143554327065e-06, 7.452136811689453e-06, 7.640787846047237e-06, 7.834214567818047e-06, 8.032537872245328e-06, 8.235881714954778e-06, 8.444373189423599e-06, 8.658142606410271e-06, 8.877323575395116e-06, 9.102053088082165e-06, 9.33247160401447e-06, 9.568723138356787e-06, 9.810955351899796e-06, 1.00593196433427e-05, 1.031397124391161e-05, 1.057506931437253e-05, 1.084277704450039e-05, 1.111726175506519e-05, 1.139869500239977e-05, 1.168725268561437e-05, 1.19831151565245e-05, 1.228646733236158e-05, 1.259749881133598e-05, 1.291640399112515e-05, 1.324338219036081e-05, 1.357863777319061e-05, 1.3922380276993e-05, 1.427482454332396e-05, 1.463619085217835e-05, 1.500670505964928e-05, 1.538659873907117e-05, 1.57761093257358e-05, 1.617548026527014e-05, 1.658496116577004e-05, 1.700480795378409e-05, 1.743528303424502e-05, 1.787665545444906e-05, 1.8329201072185e-05, 1.879320272811846e-05, 1.926895042253889e-05, 1.975674149657941e-05, 2.025688081802322e-05, 2.076968097181189e-05, 2.129546245537518e-05, 2.183455387890398e-05, 2.238729217069137e-05, 2.295402278767044e-05, 2.353509993127968e-05, 2.413088676879139e-05, 2.474175566024091e-05, 2.536808839109832e-05, 2.601027641082821e-05, 2.666872107748597e-05, 2.73438339085038e-05, 2.803603683782272e-05, 2.874576247953099e-05, 2.947345439817394e-05, 3.021956738590337e-05, 3.098456774663997e-05, 3.176893358742612e-05, 3.257315511715037e-05, 3.339773495283094e-05, 3.424318843364841e-05, 3.511004394292437e-05, 3.599884323824673e-05, 3.691014178994723e-05, 3.784450912814341e-05, 3.880252919856018e-05, 3.978480072735431e-05, 4.079193759516899e-05, 4.18245692206512e-05, 4.288334095367242e-05, 4.396891447849675e-05, 4.508196822714878e-05, 4.622319780323884e-05, 4.739331641650947e-05, 4.85930553283754e-05, 4.98231643087331e-05, 5.108441210432659e-05, 5.23775869189608e-05, 5.370349690586119e-05, 5.506297067248833e-05, 5.645685779812058e-05, 5.788602936452847e-05, 5.935137850007167e-05, 6.085382093755672e-05, 6.239429558620495e-05, 6.397376511808519e-05, 6.559321656937835e-05, 6.725366195684794e-05, 6.895613890990018e-05, 7.070171131862871e-05, 7.249146999824657e-05, 7.432653337032019e-05, 7.620804816122964e-05, 7.813719011828914e-05, 8.011516474397621e-05, 8.214320804872388e-05, 8.422258732274738e-05, 8.635460192738427e-05, 8.854058410644091e-05, 9.078189981805151e-05, 9.307994958756591e-05, 9.543616938199846e-05, 9.785203150658167e-05, 0.0001003290455239819, 0.0001028687591967507, 0.0001054727594535964, 0.0001081426733800777, 0.0001108801692343373, 0.000113686957488503, 0.0001165647918964093, 0.0001195154705882988, 0.0001225408371931866, 0.0001256427819895843, 0.0001288232430853007, 0.0001320842076270483, 0.0001354277130406118, 0.0001388558483023463, 0.0001423707552427959, 0.0001459746298832435, 0.0001496697238060174, 0.0001534583455594121, 0.0001573428620980899, 0.0001613257002598595, 0.0001654093482797499, 0.0001695963573423133, 0.0001738893431731267, 0.0001782909876704748, 0.0001828040405782275, 0.0001874313212009501, 0.0001921757201623045, 0.0001970402012078394, 0.0002020278030532778, 0.0002071416412794526, 0.0002123849102750621, 0.0002177608852284446, 0.0002232729241696121, 0.0002289244700638005, 0.0002347190529578337, 0.0002406602921806327, 0.0002467518985992198, 0.000252997676931627, 0.0002594015281181264, 0.0002659674517522523, 0.0002726995485731175, 0.000279602023020558, 0.0002866791858546884, 0.000293935456841482, 0.0003013753675060315, 0.0003090035639551912, 0.0003168248097713338, 0.0003248439889790141, 0.0003330661090863604, 0.0003414963042030667, 0.0003501398382369121, 0.0003590021081707594, 0.000368088647422065, 0.0003774051292869543, 0.0003869573704709817, 0.0003967513347087494, 0.0004067931364745926, 0.0004170890447866283, 0.0004276454871064839, 0.0004384690533371056, 0.0004495664999210962, 0.0004609447540420832, 0.0004726109179317019, 0.0004845722732848212, 0.0004968362857857097, 0.0005094106097479197, 0.0005223030928707019, 0.0005355217811148781, 0.000549074923701131, 0.0005629709782337606, 0.0005772186159530329, 0.0005918267271193175, 0.0006068044265322815, 0.0006221610591885115, 0.0006379062060809863, 0.0006540496901439277, 0.0006706015823466383, 0.0006875722079400093, 0.0007049721528594965, 0.0007228122702884283, 0.000741103687385617, 0.000759857812181348, 0.0007790863406458825, 0.0007988012639347729, 0.0008190148758153264, 0.0008397397802786996, 0.0008609888993422066, 0.0008827754810465065, 0.0009051131076524971, 0.0009280157040428044, 0.0009514975463329123, 0.0009755732706970852, 0.001000257882414327, 0.001025566765139822, 0.001051515690407341, 0.001078120827368288, 0.001105398752773176, 0.001133366461201441, 0.001162041375545683, 0.001191441357756525, 0.001221584719854432, 0.001252490235215023, 0.001284177150134473, 0.001316665195681868, 0.001349974599845431, 0.001384126099979759, 0.001419140955561373, 0.001455040961259985, 0.00149184846033317, 0.001529586358352198, 0.001568278137267009, 0.001607947869818514, 0.001648620234306509, 0.00169032052972179, 0.001733074691251155, 0.001776909306164203, 0.001821851630091085, 0.001867929603700453, 0.001915171869787199, 0.001963607790779674, 0.002013267466676337, 0.002064181753422017, 0.002116382281734115, 0.002169901476389418, 0.002224772575982317, 0.002281029653165497, 0.002338707635384415, 0.002397842326117068, 0.002458470426630884, 0.002520629558268713, 0.002584358285276209, 0.002649696138183165, 0.002716683637751493, 0.002785362319503007, 0.002855774758840227, 0.002927964596773798, 0.003001976566270412, 0.00307785651923522, 0.003155651454143274, 0.003235409544334543, 0.003317180166987485, 0.003401013932786412, 0.003486962716298091, 0.003575079687073462, 0.003665419341490463, 0.003758037535354374, 0.003852991517272353, 0.003950339962818989, 0.004050143009510292, 0.00415246229260348, 0.004257360981740447, 0.004364903818453055, 0.004475157154548494, 0.004588188991393581, 0.004704069020116808, 0.004822868662747447, 0.004944661114311233, 0.005069521385902351, 0.005197526348751906, 0.005328754779313095, 0.005463287405383696, 0.005601206953286789, 0.005742598196130564, 0.005887548003168821, 0.006036145390283445, 0.006188481571610705, 0.006344650012333364, 0.00650474648266054, 0.006668869113017864, 0.00683711845047021, 0.007009597516399593, 0.007186411865461111, 0.007367669645839384, 0.007553481660828787, 0.00774396143176004, 0.007939225262296247, 0.00813939230412139, 0.008344584624043846, 0.00855492727253814, 0.008770548353747293, 0.008991579096968487, 0.009218153929644327, 0.009450410551881694, 0.00968849001252018, 0.009932536786771399, 0.01018269885545026, 0.01043912778581888, 0.01070197881406289, 0.01097141092941996, 0.01124758695997907, 0.01153067366016851, 0.01182084179994997, 0.01211826625573457, 0.01242312610303653, 0.01273560471087803, 0.01305588983795835, 0.01338417373059864, 0.01372065322247211, 0.01406552983612811, 0.01441900988631656, 0.0147813045851171, 0.01515263014887596, 0.01553320790694984, 0.01592326441225568, 0.01632303155362046, 0.01673274666992384, 0.01715265266602263, 0.01758299813044269, 0.01802403745482095, 0.01847603095507576, 0.01893924499427966, 0.01941395210720502, 0.019900431126507, 0.02039896731050438, 0.02090985247251287, 0.02143338511167941, 0.02196987054526041, 0.02251962104227946, 0.02308295595849387, 0.02366020187259077, 0.02425169272352634, 0.02485776994891259, 0.0254787826243467, 0.02611508760356894, 0.02676704965932403, 0.02743504162478974, 0.02811944453542526, 0.02882064777107805, 0.02953904919817631, 0.03027505531181816, 0.03102908137755444, 0.03180155157264605, 0.03259289912655859, 0.03340356646044047, 0.03423400532531028, 0.03508467693865866, 0.03595605211914943, 0.03684861141907974, 0.03776284525423698, 0.03869925403076222, 0.03965834826860382, 0.04064064872111544, 0.04164668649032146, 0.04267700313734133, 0.0437321507874292, 0.04481269222904824, 0.04591920100636248, 0.04705226150448481, 0.04821246902678125, 0.04940042986348173, 0.05061676135080197, 0.05186209191973074, 0.0531370611335799, 0.05444231971334302, 0.05577852954984423, 0.05714636370159841, 0.05854650637723776, 0.05997965290128744, 0.06144650966200387, 0.06294779403990666, 0.06448423431555834, 0.06605656955505766, 0.06766554947162244, 0.06931193426154533, 0.07099649441270484, 0.0727200104837098, 0.0744832728516482, 0.07628708142629363, 0.07813224532850738, 0.08001958253044374, 0.08194991945503977, 0.08392409053213033, 0.08594293770838753, 0.08800730990813632, 0.09011806244193951, 0.09227605635968644, 0.09448215774475122, 0.0967372369456086, 0.09904216774112201, 0.1013978264355218, 0.1038050908789049, 0.1062648394088849, 0.1087779497088116, 0.1113452975777748, 0.1139677556073803, 0.1166461917600704, 0.1193814678435273, 0.122174437875468, 0.1250259463329037, 0.1279368262796948, 0.1309078973659923, 0.1339399636929147, 0.1370338115355614, 0.1401902069172315, 0.1434098930274665, 0.1466935874763094, 0.150041979376942, 0.1534557262486386, 0.1569354507317744, 0.1604817371064222, 0.1640951276059015, 0.1677761185164806, 0.1715251560542997, 0.1753426320104816, 0.1792288791553176, 0.1831841663923852, 0.1872086936534642, 0.1913025865251703, 0.1954658905983492, 0.1996985655314436, 0.2040004788193034, 0.2083713992592378, 0.2128109901065231, 0.2173188019121146, 0.2218942650359297, 0.2265366818298309, 0.2312452184853309, 0.2360188965420718, 0.2408565840543531, 0.2457569864143457, 0.2507186368322271, 0.2557398864752575, 0.2608188942698413, 0.2659536163729017, 0.2711417953214394, 0.2763809488719863, 0.2816683585448284, 0.2870010578913486, 0.2923758205067208, 0.2977891478143854, 0.3032372566534152, 0.3087160667049404, 0.3142211877993215, 0.3197479071517946, 0.3252911765807871, 0.3308455997701509, 0.3364054196441275, 0.3419645059319489, 0.3475163430077109, 0.353054018100373, 0.3585702099786068, 0.3640571782256422, 0.369506753230217, 0.3749103270313308, 0.3802588451665266, 0.3855427996860223, 0.3907522235080155, 0.3958766863038504, 0.4009052921154576, 0.4058266789213386, 0.4106290203813631, 0.415300030004579, 0.4198269679979216, 0.4241966510670507, 0.4283954654531791, 0.4324093835015775, 0.4362239840680525, 0.4398244770787968, 0.4431957325662887, 0.4463223145088581, 0.4491885198037788, 0.4517784227027339, 0.4540759250336589, 0.4560648125237793, 0.4577288175243293, 0.4590516884174033, 0.4600172659587883, 0.4606095667766817, 0.4608128742040984, 0.460611836571555, 0.4599915730254506, 0.4589377868654325, 0.4574368863100467, 0.4554761125031592, 0.4530436744630848, 0.4501288905512252, 0.4467223358964665, 0.4428159950549876, 0.4384034190118558, 0.4334798854405643, 0.4280425609292512, 0.4220906636579393, 0.4156256247702061, 0.4086512464260424, 0.401173854251717, 0.3932024416190021, 0.3847488028926658, 0.3758276524849404, 0.3664567262525389, 0.3566568614707979, 0.3464520513262264, 0.335869469590148, 0.324939460880163, 0.3136954916918384, 0.3021740572009562, 0.2904145387082445, 0.2784590065367782, 0.2663519632114785, 0.2541400218654081, 0.2418715150457604, 0.2295960294505311, 0.2173638626332312, 0.2052253983861088, 0.193230398370489, 0.1814272086244612, 0.1698618808598835, 0.158577209978067, 0.147611690999317, 0.1369984006248272, 0.1267638109353862, 0.1169265452787711, 0.107496089198887, 0.09847147229860709, 0.0898399401786237, 0.0815847235444892, 0.07371530010502347, 0.0662468847281279, 0.05919220579228868, 0.05256136437167314, 0.04636171352155616, 0.04059776042796659, 0.03527109410148388, 0.0303803411613931, 0.02592115207090229, 0.02188621994563422, 0.01826533376640493, 0.01504546748505442, 0.01221090612191267, 0.009743409520465929, 0.00762241395541885, 0.005825271293054768, 0.004327524887466239, 0.003103220874188906, 0.002125253003483651, 0.001365736097954984, 0.0007964024142110837, 0.0003890338867149623, 0.0001159269776722649, -4.964634759208053e-05, -0.0001329544057030452, -0.0001573155398869597, -0.0001436032936786291, -0.000109752309762845, -7.026991308284325e-05, -3.575871131572539e-05, -1.245587356574207e-05, -1.795335764942507e-06, -1.110223024625157e-16, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 2 + }, + { + "i": 2, + "radial_function": [ + 4.045914479497745e-15, 4.253352943694258e-15, 4.471427004834642e-15, 4.700681961592276e-15, 4.941691070698119e-15, 5.195056980380604e-15, 5.461413237299735e-15, 5.741425870743241e-15, 6.035795058046142e-15, 6.345256875398335e-15, 6.670585138417744e-15, 7.012593337091819e-15, 7.372136669925525e-15, 7.750114182382268e-15, 8.147471014965134e-15, 8.565200766559376e-15, 9.00434797894633e-15, 9.466010748700926e-15, 9.951343473003854e-15, 1.046155973623486e-14, 1.099793534456428e-14, 1.156181151613174e-14, 1.215459823478858e-14, 1.277777777579026e-14, 1.343290841225502e-14, 1.412162831165616e-14, 1.484565963209221e-14, 1.560681282857696e-14, 1.640699118011758e-14, 1.724819554890121e-14, 1.813252938348959e-14, 1.906220397853336e-14, 2.003954400415725e-14, 2.106699331884275e-14, 2.214712108034392e-14, 2.328262816991551e-14, 2.447635394591897e-14, 2.573128334369252e-14, 2.705055433943937e-14, 2.843746579679755e-14, 2.989548571571107e-14, 3.14282599042303e-14, 3.303962109492418e-14, 3.473359852870082e-14, 3.651442803000108e-14, 3.838656259855681e-14, 4.035468354420118e-14, 4.242371219257161e-14, 4.459882219097669e-14, 4.688545244519835e-14, 4.928932071957632e-14, 5.181643793438454e-14, 5.447312319624837e-14, 5.726601959918702e-14, 6.020211083579338e-14, 6.328873866008373e-14, 6.653362124568861e-14, 6.994487248528646e-14, 7.353102227953871e-14, 7.73010378662615e-14, 8.126434624316368e-14, 8.543085774022428e-14, 8.98109908006493e-14, 9.441569803237333e-14, 9.925649359525083e-14, 1.043454819924135e-13, 1.096953883377936e-13, 1.15319590175493e-13, 1.212321509305632e-13, 1.274478550748445e-13, 1.339822450957891e-13, 1.408516603607175e-13, 1.480732779736836e-13, 1.556651557271124e-13, 1.636462772556153e-13, 1.720365995048815e-13, 1.808571026343529e-13, 1.901298424784577e-13, 1.998780056975844e-13, 2.10125967756704e-13, 2.208993538766096e-13, 2.322251031101926e-13, 2.441315357039721e-13, 2.566484239133144e-13, 2.698070664484268e-13, 2.836403667372611e-13, 2.981829152010428e-13, 3.134710757481447e-13, 3.295430767025857e-13, 3.46439106394528e-13, 3.642014136517843e-13, 3.828744134436323e-13, 4.025047979410848e-13, 4.231416532713287e-13, 4.448365822582845e-13, 4.67643833456177e-13, 4.916204367987988e-13, 5.168263462036302e-13, 5.433245894874138e-13, 5.711814259680472e-13, 6.004665121468601e-13, 6.312530758855953e-13, 6.63618099513597e-13, 6.976425123230779e-13, 7.334113929338116e-13, 7.710141820332217e-13, 8.105449060238824e-13, 8.521024121376142e-13, 8.95790615604102e-13, 9.417187594920817e-13, 9.900016878727937e-13, 1.040760132988801e-12, 1.094121017146202e-12, 1.150217770085114e-12, 1.209190662622055e-12, 1.271187157398416e-12, 1.336362277612164e-12, 1.404878994654708e-12, 1.47690863562226e-12, 1.552631311720656e-12, 1.632236368634829e-12, 1.715922859989149e-12, 1.803900045082437e-12, 1.896387912142291e-12, 1.993617728407108e-12, 2.095832618411197e-12, 2.20328817191913e-12, 2.31625308302931e-12, 2.435009822044922e-12, 2.559855341792266e-12, 2.691101820152497e-12, 2.829077440663638e-12, 2.974127213144608e-12, 3.126613836393291e-12, 3.286918605115798e-12, 3.455442363354597e-12, 3.632606506799666e-12, 3.818854036488877e-12, 4.014650666532296e-12, 4.220485988630375e-12, 4.436874696297674e-12, 4.66435787185351e-12, 4.903504339397404e-12, 5.154912087152504e-12, 5.419209762733521e-12, 5.697058245077927e-12, 5.989152296971137e-12, 6.296222302297668e-12, 6.619036092362178e-12, 6.958400865847193e-12, 7.315165207207975e-12, 7.690221208551873e-12, 8.08450670030745e-12, 8.499007596261291e-12, 8.93476035882621e-12, 9.392854590704677e-12, 9.874435759428535e-12, 1.038070806158694e-11, 1.091293743390467e-11, 1.147245471869991e-11, 1.206065899163612e-11, 1.267902106008954e-11, 1.332908714087912e-11, 1.401248272655521e-11, 1.473091664991445e-11, 1.548618535690309e-11, 1.628017739859464e-11, 1.71148781534714e-11, 1.79923747918195e-11, 1.891486149464985e-11, 1.988464494019352e-11, 2.090415007169266e-11, 2.197592616090553e-11, 2.310265318248965e-11, 2.428714851520057e-11, 2.553237398666074e-11, 2.684144327931697e-11, 2.821762971609915e-11, 2.966437444525215e-11, 3.118529504480403e-11, 3.278419456818407e-11, 3.4465071053612e-11, 3.623212752103047e-11, 3.808978248158125e-11, 4.004268098590133e-11, 4.209570623886135e-11, 4.425399180979356e-11, 4.65229344687321e-11, 4.890820768076618e-11, 5.141577579224605e-11, 5.405190894430817e-11, 5.682319875101672e-11, 5.973657478131269e-11, 6.279932188598923e-11, 6.601909841301265e-11, 6.940395535673149e-11, 7.296235648886077e-11, 7.670319952156464e-11, 8.063583835556106e-11, 8.477010646887263e-11, 8.911634150469803e-11, 9.368541111989285e-11, 9.848874015867372e-11, 1.035383392195013e-10, 1.08846834686561e-10, 1.144275003009253e-10, 1.202942903503473e-10, 1.264618745606511e-10, 1.329456747759714e-10, 1.39761903519548e-10, 1.469276045314777e-10, 1.54460695384798e-10, 1.623800122864299e-10, 1.707053571750087e-10, 1.794575472333543e-10, 1.88658466939362e-10, 1.983311227854755e-10, 2.084997008035214e-10, 2.191896270387542e-10, 2.304276311242963e-10, 2.422418131149128e-10, 2.546617137472418e-10, 2.677183883021069e-10, 2.814444842536091e-10, 2.95874322899119e-10, 3.110439851742406e-10, 3.269914018673333e-10, 3.437564484590899e-10, 3.613810448243136e-10, 3.799092600451504e-10, 3.99387422597793e-10, 4.198642361881823e-10, 4.413909015262362e-10, 4.640212443431007e-10, 4.878118499714513e-10, 5.128222048252681e-10, 5.391148451328576e-10, 5.667555132948596e-10, 5.958133222582082e-10, 6.263609283169414e-10, 6.584747127718307e-10, 6.922349729030425e-10, 7.277261227331449e-10, 7.650369040824369e-10, 8.04260608444173e-10, 8.454953102343377e-10, 8.888441119990907e-10, 9.34415402192828e-10, 9.823231261712848e-10, 1.032687071077103e-09, 1.085633165329994e-09, 1.141293793470202e-09, 1.199808127142235e-09, 1.261322473046292e-09, 1.325990638727122e-09, 1.393974317114691e-09, 1.465443490777891e-09, 1.54057685690174e-09, 1.619562274050417e-09, 1.702597231832824e-09, 1.789889344644613e-09, 1.881656870720915e-09, 1.978129257797e-09, 2.079547716740925e-09, 2.186165824591816e-09, 2.298250158511113e-09, 2.416080962231285e-09, 2.539952846667719e-09, 2.670175526444886e-09, 2.807074594177622e-09, 2.950992334442642e-09, 3.102288579474728e-09, 3.261341608726044e-09, 3.428549094536922e-09, 3.604329096281445e-09, 3.789121105472292e-09, 3.98338714443686e-09, 4.187612921310084e-09, 4.402309044230562e-09, 4.628012297774192e-09, 4.865286984814958e-09, 5.11472633716633e-09, 5.376953998528031e-09, 5.652625583444045e-09, 5.942430316167335e-09, 6.247092753526217e-09, 6.567374596097768e-09, 6.904076592213192e-09, 7.258040539553128e-09, 7.630151389333691e-09, 8.021339458340556e-09, 8.432582754338131e-09, 8.864909420663276e-09, 9.319400306111322e-09, 9.79719166501841e-09, 1.029947800256495e-08, 1.082751505553818e-08, 1.138262292684535e-08, 1.196618940209112e-08, 1.257967338903725e-08, 1.322460859304769e-08, 1.390260734308052e-08, 1.461536460279971e-08, 1.536466223111121e-08, 1.61523734213803e-08, 1.698046738671863e-08, 1.785101428016226e-08, 1.876619037740723e-08, 1.972828348930948e-08, 2.073969872086748e-08, 2.180296443659526e-08, 2.292073861333021e-08, 2.409581547090275e-08, 2.533113243878256e-08, 2.662977752876697e-08, 2.799499702419236e-08, 2.943020359554436e-08, 3.093898484023693e-08, 3.252511224514334e-08, 3.419255057059979e-08, 3.594546787814141e-08, 3.778824572410281e-08, 3.972549040354333e-08, 4.17620440955678e-08, 4.390299736963235e-08, 4.615370157300699e-08, 4.851978228855661e-08, 5.100715349374719e-08, 5.362203231954854e-08, 5.637095438957116e-08, 5.926079038881075e-08, 6.229876298719311e-08, 6.549246521707315e-08, 6.884987903001477e-08, 7.23793955954224e-08, 7.608983596287542e-08, 7.999047340938168e-08, 8.409105625087607e-08, 8.84018325131865e-08, 9.293357519035652e-08, 9.769760952411896e-08, 1.027058410763688e-07, 1.079707852619522e-07, 1.135055990825673e-07, 1.19324113604629e-07, 1.254408686348963e-07, 1.318711489141531e-07, 1.386310224408222e-07, 1.457373804783979e-07, 1.532079797369722e-07, 1.610614867517515e-07, 1.693175244416644e-07, 1.77996721118764e-07, 1.871207618186106e-07, 1.967124427920729e-07, 2.067957279806376e-07, 2.173958090952736e-07, 2.285391684621387e-07, 2.402536449766521e-07, 2.525685036443408e-07, 2.655145089002048e-07, 2.791240006510276e-07, 2.934309754125021e-07, 3.084711717645451e-07, 3.242821580646927e-07, 3.409034275047145e-07, 3.583764957621989e-07, 3.767450049382975e-07, 3.960548324786971e-07, 4.163542053886171e-07, 4.376938205417976e-07, 4.601269708980277e-07, 4.837096790214877e-07, 5.085008360708562e-07, 5.345623491077053e-07, 5.619592950243196e-07, 5.907600828370516e-07, 6.210366247875989e-07, 6.528645145747828e-07, 6.863232165630145e-07, 7.214962633507835e-07, 7.584714642881274e-07, 7.973411237592823e-07, 8.382022713945219e-07, 8.811569040588096e-07, 9.263122392237256e-07, 9.737809825412614e-07, 1.023681608350283e-06, 1.076138654605902e-06, 1.131283033225959e-06, 1.189252355239236e-06, 1.250191274410477e-06, 1.314251846404739e-06, 1.381593907257763e-06, 1.452385472081339e-06, 1.526803151431705e-06, 1.605032592061999e-06, 1.687268937979615e-06, 1.773717315693781e-06, 1.864593345275301e-06, 1.960123674687838e-06, 2.060546545072664e-06, 2.166112381649133e-06, 2.277084416808132e-06, 2.393739343279357e-06, 2.516368002802531e-06, 2.645276107542321e-06, 2.780784999722431e-06, 2.923232449681732e-06, 3.072973493957858e-06, 3.230381317351545e-06, 3.395848178396108e-06, 3.569786382930386e-06, 3.752629307222803e-06, 3.944832472123472e-06, 4.146874673491834e-06, 4.359259168892154e-06, 4.582514924845684e-06, 4.817197927966909e-06, 5.06389256264064e-06, 5.323213058120312e-06, 5.595805009803248e-06, 5.882346977989096e-06, 6.183552166967797e-06, 6.500170189554283e-06, 6.832988922876773e-06, 7.18283645548904e-06, 7.550583135968887e-06, 7.937143723489934e-06, 8.34347964870913e-06, 8.77060138843878e-06, 9.219570961768619e-06, 9.691504551561744e-06, 1.018757525861886e-05, 1.070901599727935e-05, 1.125712253480207e-05, 1.18332566870207e-05, 1.243884967399924e-05, 1.307540564634598e-05, 1.374450538949954e-05, 1.444781021509068e-05, 1.51870660492328e-05, 1.596410772700258e-05, 1.678086350374535e-05, 1.763935979403462e-05, 1.854172615029619e-05, 1.949020049087596e-05, 2.048713459326636e-05, 2.153499986340725e-05, 2.263639339504587e-05, 2.379404433473337e-05, 2.501082056678901e-05, 2.628973573381412e-05, 2.763395661136525e-05, 2.904681085131193e-05, 3.053179511514007e-05, 3.209258361520818e-05, 3.373303708387217e-05, 3.545721219219763e-05, 3.726937144029828e-05, 3.917399354349384e-05, 4.117578433679148e-05, 4.327968822565247e-05, 4.54909002083829e-05, 4.781487849810677e-05, 5.025735777526914e-05, 5.282436310068337e-05, 5.552222452090698e-05, 5.835759240238548e-05, 6.133745352583204e-05, 6.4469147982808e-05, 6.776038690936898e-05, 7.121927110195468e-05, 7.485431055350293e-05, 7.867444495896923e-05, 8.268906523494925e-05, 8.690803610311747e-05, 9.134171978906889e-05, 9.600100089065244e-05, 0.0001008973124716756, 0.0001060426634402384, 0.0001114496672747996, 0.0001171315721599382, 0.000123102292601493, 0.0001293764425918552, 0.0001359693703984699, 0.0001428971950526035, 0.0001501768446210466, 0.0001578260963434895, 0.0001658636187240688, 0.0001743090156711587, 0.0001831828727797676, 0.0001925068058579651, 0.0002023035118031834, 0.0002125968219377053, 0.000223411757918001, 0.000234774590338226, 0.0002467129001511002, 0.0002592556430395789, 0.0002724332168719977, 0.0002862775323843807, 0.0003008220872377367, 0.0003161020436028844, 0.0003321543094352382, 0.0003490176236052522, 0.0003667326450600159, 0.0003853420461950001, 0.0004048906106283895, 0.0004254253355701873, 0.0004469955389927751, 0.0004696529718151254, 0.000493451935320041, 0.0005184494040353563, 0.0005447051543144037, 0.0005722818988673109, 0.0006012454274946983, 0.0006316647542934707, 0.0006636122716085825, 0.0006971639110167922, 0.0007323993116391391, 0.0007694019960882235, 0.0008082595543652878, 0.0008490638360389572, 0.0008919111510397208, 0.0009369024794230661, 0.0009841436904601396, 0.001033745771429859, 0.001085825066493323, 0.001140503526048068, 0.001197908966965465, 0.001258175344131595, 0.001321443033717407, 0.001387859128618439, 0.001457577746516163, 0.001530760351019047, 0.001607576086357012, 0.00168820212610872, 0.001772824036452815, 0.001861636154441034, 0.001954841981802874, 0.002052654594792494, 0.002155297070602554, 0.002263002930868145, 0.002376016602792355, 0.002494593898427442, 0.002619002512644328, 0.002749522540326372, 0.002886447013318025, 0.003030082457657207, 0.003180749471613479, 0.003338783325041092, 0.003504534580551855, 0.003678369736989011, 0.003860671895673292, 0.004051841449863196, 0.004252296797850689, 0.004462475080080313, 0.004682832940646727, 0.004913847313481357, 0.00515601623349851, 0.005409859672913716, 0.005675920402890499, 0.005954764880606209, 0.006246984161754135, 0.006553194838412605, 0.006874040002127077, 0.007210190231946017, 0.007562344607039479, 0.007931231743411307, 0.008317610854076724, 0.008722272831931787, 0.009146041354381544, 0.009589774008616578, 0.01005436343623659, 0.01054073849571531, 0.01104986544097156, 0.01158274911407184, 0.01214043414982453, 0.0127240061897426, 0.01333459310254605, 0.01397336620804531, 0.01464154150089458, 0.01534038087032441, 0.01607119331155707, 0.01683533612417528, 0.01763421609224895, 0.01846929064053531, 0.01934206896053742, 0.02025411309965134, 0.02120703900603733, 0.02220251752122478, 0.0232422753117951, 0.02432809573078717, 0.02546181959873225, 0.026645345893447, 0.0278806323368982, 0.02916969586660191, 0.03051461297811879, 0.03191751992428569, 0.03338061275584449, 0.03490614718712708, 0.03649643826941312, 0.03815385985349604, 0.03988084382189094, 0.04167987906997549, 0.04355351021419578, 0.04550433600428732, 0.04753500741525747, 0.04964822539367343, 0.05184673823158249, 0.05413333854018483, 0.05651085979418755, 0.05898217241659138, 0.06155017937253486, 0.06421781123972324, 0.06698802072194977, 0.06986377657126752, 0.07284805688351335, 0.07594384173115677, 0.07915410509683765, 0.08248180607052032, 0.08592987927293742, 0.08950122446795065, 0.09319869532666376, 0.09702508730659626, 0.1009831246100172, 0.1050754461866756, 0.1093045907476861, 0.1136729807592902, 0.1181829053876348, 0.1228365023686705, 0.127635738780788, 0.1325823907019572, 0.1376780217379586, 0.1429239604138363, 0.1483212764270389, 0.1538707557678922, 0.1595728747211013, 0.1654277727710187, 0.1714352244434232, 0.1775946101276418, 0.1839048859350348, 0.1903645526631804, 0.196971623949645, 0.2037235937149165, 0.2106174030110826, 0.2176494064110481, 0.2248153380925223, 0.2321102777917479, 0.2395286168237756, 0.2470640243891489, 0.2547094144109476, 0.2624569131711511, 0.2702978280412112, 0.278222617628208, 0.2862208636850054, 0.2942812451600657, 0.3023915147897437, 0.3105384786627713, 0.3187079792126869, 0.3268848821189578, 0.3350530676208759, 0.3431954267694572, 0.3512938631611184, 0.3593293007119454, 0.3672816980424978, 0.3751300700493669, 0.3828525172403918, 0.3904262634047914, 0.3978277021764172, 0.4050324530271944, 0.4120154271974928, 0.4187509040297882, 0.4252126181206802, 0.4313738576430671, 0.4372075741144038, 0.4426865037975506, 0.447783300817165, 0.4524706819564286, 0.4567215829656485, 0.4605093260659487, 0.4638077981678712, 0.4665916391466899, 0.4688364393244063, 0.4705189451037817, 0.4716172714840594, 0.4721111199632088, 0.4719820001001707, 0.4712134527758323, 0.4697912729569555, 0.4677037295373052, 0.4649417796096232, 0.4614992743162527, 0.4573731532410737, 0.4525636241473398, 0.4470743247417152, 0.4409124630612678, 0.4340889330444168, 0.4266184018656233, 0.4185193656935847, 0.4098141706794194, 0.4005289961997315, 0.390693797672807, 0.3803422066359932, 0.3695113862180381, 0.3582418406581861, 0.3465771781077746, 0.3345638265897894, 0.3222507036731106, 0.3096888411229067, 0.2969309664937704, 0.2840310443106149, 0.2710437801026057, 0.258024091081844, 0.2450265476531832, 0.2321047901637634, 0.2193109253103854, 0.2066949063810048, 0.1943039009790951, 0.182181649040367, 0.1703678127851553, 0.1588973187579875, 0.1477996903103357, 0.1370983668308416, 0.1268100037991338, 0.1169437454502174, 0.107500459644424, 0.09847192264648949, 0.08983994017862382, 0.0815847235444892, 0.07371530010502347, 0.0662468847281279, 0.05919220579228868, 0.05256136437167314, 0.04636171352155616, 0.04059776042796659, 0.03527109410148388, 0.0303803411613931, 0.02592115207090229, 0.02188621994563422, 0.01826533376640493, 0.01504546748505442, 0.01221090612191267, 0.009743409520465929, 0.00762241395541885, 0.005825271293054768, 0.004327524887466239, 0.003103220874188906, 0.002125253003483651, 0.001365736097954984, 0.0007964024142110837, 0.0003890338867149623, 0.0001159269776722649, -4.964634759208053e-05, -0.0001329544057030452, -0.0001573155398869597, -0.0001436032936786291, -0.000109752309762845, -7.026991308284325e-05, -3.575871131572539e-05, -1.245587356574207e-05, -1.795335764942507e-06, -1.110223024625157e-16, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 2, + "j": 2 + }, + { + "i": 2, + "radial_function": [ + 2.668319347193442e-08, 2.735868171519101e-08, 2.805127002394573e-08, 2.876139128837691e-08, 2.948948935732807e-08, 3.023601931572727e-08, 3.100144776903015e-08, 3.178625313486332e-08, 3.25909259420506e-08, 3.341596913720962e-08, 3.426189839910931e-08, 3.512924246098586e-08, 3.601854344101789e-08, 3.693035718116781e-08, 3.786525359460104e-08, 3.882381702189964e-08, 3.980664659629415e-08, 4.08143566181412e-08, 4.18475769388806e-08, 4.290695335471308e-08, 4.399314801024322e-08, 4.510683981234127e-08, 4.624872485448155e-08, 4.741951685182317e-08, 4.861994758730503e-08, 4.985076736903313e-08, 5.111274549924749e-08, 5.240667075516045e-08, 5.373335188196747e-08, 5.50936180983392e-08, 5.648831961470886e-08, 5.791832816468144e-08, 5.938453754989464e-08, 6.088786419867336e-08, 6.242924773882709e-08, 6.400965158494628e-08, 6.563006354056776e-08, 6.7291496415583e-08, 6.899498865927647e-08, 7.07416050093898e-08, 7.253243715761589e-08, 7.436860443194129e-08, 7.62512544962615e-08, 7.818156406770709e-08, 8.016073965212968e-08, 8.21900182982052e-08, 8.42706683706291e-08, 8.640399034288409e-08, 8.859131761007687e-08, 9.083401732235262e-08, 9.313349123940603e-08, 9.549117660662566e-08, 9.790854705341738e-08, 1.003871135142687e-07, 1.02928425173131e-07, 1.055340704317068e-07, 1.082056779022513e-07, 1.109449174255058e-07, 1.137535011143999e-07, 1.166331844241765e-07, 1.195857672496046e-07, 1.226130950499704e-07, 1.257170600025463e-07, 1.288996021852608e-07, 1.321627107893073e-07, 1.355084253624498e-07, 1.389388370838033e-07, 1.424560900708858e-07, 1.460623827197572e-07, 1.497599690790856e-07, 1.535511602589956e-07, 1.57438325875584e-07, 1.614238955320022e-07, 1.655103603370322e-07, 1.697002744621072e-07, 1.739962567377444e-07, 1.784009922903952e-07, 1.829172342207306e-07, 1.875478053244125e-07, 1.922955998564271e-07, 1.971635853400807e-07, 2.021548044217919e-07, 2.072723767728374e-07, 2.125195010392395e-07, 2.178994568410172e-07, 2.234156068220453e-07, 2.290713987518077e-07, 2.348703676803565e-07, 2.408161381478225e-07, 2.469124264498616e-07, 2.531630429604476e-07, 2.595718945134697e-07, 2.661429868446179e-07, 2.728804270950851e-07, 2.797884263786525e-07, 2.868713024137554e-07, 2.941334822221851e-07, 3.015795048961047e-07, 3.092140244351133e-07, 3.170418126551314e-07, 3.250677621709195e-07, 3.332968894541054e-07, 3.4173433796862e-07, 3.503853813855094e-07, 3.592554268791304e-07, 3.683500185067831e-07, 3.776748406739052e-07, 3.872357216869839e-07, 3.970386373964089e-07, 4.07089714931548e-07, 4.173952365303679e-07, 4.279616434660078e-07, 4.38795540072752e-07, 4.499036978739172e-07, 4.612930598142413e-07, 4.729707445994064e-07, 4.849440511454247e-07, 4.972204631406539e-07, 5.098076537233015e-07, 5.227134902773406e-07, 5.35946039349827e-07, 5.495135716927054e-07, 5.634245674322409e-07, 5.776877213693174e-07, 5.92311948413913e-07, 6.073063891571394e-07, 6.226804155843462e-07, 6.38443636932844e-07, 6.546059056979168e-07, 6.711773237908787e-07, 6.881682488530066e-07, 7.055893007293243e-07, 7.234513681062555e-07, 7.417656153173163e-07, 7.605434893210876e-07, 7.797967268558257e-07, 7.99537361775199e-07, 8.197777325697225e-07, 8.405304900785907e-07, 8.61808605396742e-07, 8.836253779820697e-07, 9.059944439678834e-07, 9.289297846857814e-07, 9.524457354042816e-07, 9.765569942886705e-07, 1.001278631587655e-06, 1.026626099052579e-06, 1.052615239595078e-06, 1.079262297189215e-06, 1.106583927024281e-06, 1.134597205914605e-06, 1.163319642972885e-06, 1.192769190553708e-06, 1.222964255474098e-06, 1.253923710518104e-06, 1.285666906232614e-06, 1.318213683021778e-06, 1.351584383547595e-06, 1.385799865444395e-06, 1.420881514355208e-06, 1.456851257298097e-06, 1.493731576370891e-06, 1.53154552280281e-06, 1.570316731361808e-06, 1.610069435126642e-06, 1.650828480632822e-06, 1.692619343402057e-06, 1.735468143864729e-06, 1.779401663685475e-06, 1.824447362502035e-06, 1.870633395087783e-06, 1.917988628948779e-06, 1.966542662366199e-06, 2.016325842895529e-06, 2.067369286334058e-06, 2.119704896168431e-06, 2.173365383514618e-06, 2.228384287562514e-06, 2.284795996538138e-06, 2.342635769196462e-06, 2.401939756858233e-06, 2.462745026004738e-06, 2.525089581444406e-06, 2.589012390065911e-06, 2.654553405192539e-06, 2.72175359155299e-06, 2.790654950884372e-06, 2.861300548183203e-06, 2.933734538620938e-06, 3.008002195140869e-06, 3.08414993675347e-06, 3.162225357548134e-06, 3.242277256439143e-06, 3.324355667664702e-06, 3.408511892057964e-06, 3.494798529109546e-06, 3.583269509841784e-06, 3.673980130514973e-06, 3.766987087186895e-06, 3.862348511147147e-06, 3.960124005248296e-06, 4.060374681156856e-06, 4.163163197546965e-06, 4.268553799260999e-06, 4.376612357461385e-06, 4.487406410798656e-06, 4.601005207621747e-06, 4.717479749256531e-06, 4.836902834379937e-06, 4.95934910451727e-06, 5.084895090691022e-06, 5.21361926125066e-06, 5.345602070912872e-06, 5.480926011043199e-06, 5.619675661210398e-06, 5.761937742045583e-06, 5.907801169439518e-06, 6.057357110111494e-06, 6.210699038584857e-06, 6.367922795604629e-06, 6.529126648033628e-06, 6.694411350264817e-06, 6.863880207187817e-06, 7.037639138749291e-06, 7.21579674614739e-06, 7.398464379701422e-06, 7.585756208439635e-06, 7.777789291447981e-06, 7.97468365102486e-06, 8.176562347687475e-06, 8.383551557076315e-06, 8.595780648806458e-06, 8.813382267314172e-06, 9.036492414749922e-06, 9.265250535969351e-06, 9.499799605675055e-06, 9.740286217764192e-06, 9.986860676937032e-06, 1.023967709262418e-05, 1.049889347529095e-05, 1.076467183517881e-05, 1.103717828354624e-05, 1.131658313647123e-05, 1.160306102128122e-05, 1.189679098567632e-05, 1.219795660961412e-05, 1.25067461200262e-05, 1.282335250843745e-05, 1.314797365156223e-05, 1.348081243495226e-05, 1.382207687977359e-05, 1.417198027279184e-05, 1.453074129964684e-05, 1.489858418149976e-05, 1.52757388151383e-05, 1.566244091662714e-05, 1.605893216859357e-05, 1.646546037123991e-05, 1.68822795971775e-05, 1.730965035017841e-05, 1.774783972794417e-05, 1.819712158899333e-05, 1.865777672377151e-05, 1.91300930300913e-05, 1.961436569301111e-05, 2.011089736926541e-05, 2.061999837636142e-05, 2.114198688646022e-05, 2.167718912516333e-05, 2.22259395753289e-05, 2.278858118604425e-05, 2.33654655868859e-05, 2.395695330759995e-05, 2.456341400334045e-05, 2.518522668560614e-05, 2.582277995901937e-05, 2.647647226409546e-05, 2.714671212615311e-05, 2.783391841052201e-05, 2.85385205842063e-05, 2.926095898416705e-05, 3.000168509239139e-05, 3.076116181791955e-05, 3.15398637860058e-05, 3.233827763459369e-05, 3.315690231828995e-05, 3.399624942002765e-05, 3.485684347061127e-05, 3.573922227634471e-05, 3.664393725494506e-05, 3.757155377995209e-05, 3.852265153384826e-05, 3.949782487010885e-05, 4.049768318440816e-05, 4.152285129521332e-05, 4.257396983400186e-05, 4.365169564534764e-05, 4.475670219712286e-05, 4.588968000107279e-05, 4.70513370440249e-05, 4.824239923000022e-05, 4.946361083350358e-05, 5.071573496427369e-05, 5.199955404378344e-05, 5.331587029378651e-05, 5.466550623721428e-05, 5.604930521173566e-05, 5.746813189629825e-05, 5.892287285097984e-05, 6.041443707048496e-05, 6.194375655163159e-05, 6.351178687518104e-05, 6.511950780237235e-05, 6.676792388653285e-05, 6.84580651001453e-05, 7.019098747776025e-05, 7.19677737751556e-05, 7.378953414515064e-05, 7.565740683049633e-05, 7.757255887427204e-05, 7.953618684822922e-05, 8.154951759953649e-05, 8.361380901638721e-05, 8.573035081294816e-05, 8.790046533413392e-05, 9.012550838070943e-05, 9.240687005522925e-05, 9.474597562934267e-05, 9.714428643299998e-05, 9.960330076611255e-05, 0.0001021245548332331, 0.0001047096237018329, 0.0001073601222847748, 0.0001100777063475863, 0.0001128640735411604, 0.0001157209644605238, 0.0001186501637303247, 0.0001216535011177187, 0.000124732852673337, 0.0001278901419010454, 0.0001311273409572187, 0.0001344464718802694, 0.0001378496078511965, 0.0001413388744859295, 0.0001449164511602688, 0.000148584572368242, 0.0001523455291147131, 0.0001562016703431079, 0.000160155404399136, 0.000164209200531412, 0.0001683655904299062, 0.0001726271698031673, 0.000176996599995297, 0.0001814766096436699, 0.0001860699963784205, 0.0001907796285647462, 0.0001956084470890975, 0.0002005594671903568, 0.0002056357803371326, 0.0002108405561523235, 0.0002161770443861372, 0.0002216485769387737, 0.0002272585699340211, 0.0002330105258450328, 0.0002389080356735937, 0.0002449547811842152, 0.0002511545371944196, 0.0002575111739226317, 0.0002640286593951064, 0.0002707110619133685, 0.0002775625525836812, 0.0002845874079100783, 0.0002917900124525597, 0.0002991748615520647, 0.0003067465641238929, 0.0003145098455212765, 0.0003224695504708461, 0.0003306306460817894, 0.0003389982249305275, 0.0003475775082227896, 0.0003563738490350153, 0.000365392735637043, 0.0003746397948981164, 0.0003841207957782709, 0.0003938416529072164, 0.0004038084302528891, 0.0004140273448818972, 0.0004245047708141243, 0.0004352472429738398, 0.0004462614612396868, 0.0004575542945960029, 0.0004691327853879741, 0.0004810041536831825, 0.00049317580174218, 0.0005056553186007725, 0.0005184504847667661, 0.0005315692770340021, 0.0005450198734165519, 0.0005588106582060428, 0.0005729502271551314, 0.0005874473927902212, 0.0006023111898566017, 0.0006175508808992415, 0.0006331759619825743, 0.000649196168552666, 0.0006656214814452462, 0.0006824621330431776, 0.0006997286135869814, 0.0007174316776421874, 0.0007355823507272952, 0.0007541919361062708, 0.0007732720217495749, 0.0007928344874677984, 0.0008128915122221168, 0.0008334555816158194, 0.0008545394955713072, 0.0008761563761970404, 0.0008983196758490024, 0.0009210431853913896, 0.0009443410426613039, 0.0009682277411423542, 0.0009927181388521883, 0.001017827467449058, 0.001043571341562681, 0.001069965768354748, 0.001097027157314542, 0.001124772330295288, 0.00115321853179692, 0.001182383439501151, 0.001212285175064791, 0.001242942315177429, 0.001274373902889723, 0.001306599459218633, 0.00133963899503616, 0.001373513023248186, 0.001408242571270232, 0.001443849193807059, 0.001480354985943167, 0.001517782596551445, 0.001556155242027323, 0.001595496720355961, 0.001635831425520144, 0.00167718436225671, 0.001719581161169525, 0.00176304809420713, 0.001807612090513372, 0.001853300752659526, 0.001900142373266475, 0.001948165952025838, 0.001997401213128953, 0.002047878623112874, 0.002099629409132728, 0.002152685577669827, 0.002207079933685293, 0.002262846100228962, 0.002320018538513563, 0.002378632568464406, 0.002438724389754835, 0.002500331103338086, 0.002563490733486154, 0.002628242250346594, 0.002694625593028315, 0.002762681693227517, 0.002832452499405278, 0.002903981001528254, 0.002977311256384276, 0.003052488413484768, 0.003129558741565948, 0.003208569655701175, 0.003289569745036724, 0.003372608801163563, 0.003457737847137823, 0.003545009167162757, 0.003634476336945211, 0.003726194254739657, 0.003820219173093043, 0.003916608731303832, 0.004015421988608573, 0.004116719458109729, 0.004220563141458273, 0.004327016564304822, 0.004436144812533168, 0.004548014569289901, 0.004662694152824209, 0.004780253555151634, 0.004900764481555752, 0.005024300390941773, 0.005150936537055766, 0.00528075001058362, 0.00541381978214328, 0.005550226746184099, 0.005690053765806847, 0.005833385718517678, 0.005980309542929524, 0.006130914286423757, 0.006285291153785062, 0.006443533556821981, 0.006605737164985304, 0.006771999956996311, 0.006942422273496181, 0.007117106870727657, 0.007296158975259532, 0.007479686339763739, 0.007667799299854724, 0.007860610831999522, 0.008058236612506663, 0.00826079507760114, 0.008468407484591482, 0.008681197974134674, 0.008899293633602942, 0.00912282456155583, 0.009351923933319479, 0.009586728067673755, 0.009827376494646713, 0.01007401202441399, 0.01032678081729932, 0.0105858324548706, 0.01085132001212379, 0.01112340013074534, 0.0114022330934412, 0.01168798289931837, 0.01198081734030248, 0.01228090807857189, 0.01258843072498641, 0.01290356491848531, 0.01322649440642596, 0.01355740712583144, 0.0138964952855109, 0.01424395544901344, 0.01459998861837117, 0.01496480031858314, 0.01533860068278684, 0.01572160453805864, 0.01611403149177957, 0.01651610601849655, 0.01692805754720339, 0.01735012054895913, 0.01778253462475413, 0.01822554459352775, 0.01867940058023259, 0.01914435810383261, 0.01962067816511369, 0.020108627334175, 0.02060847783746111, 0.0211205076441828, 0.02164500055196424, 0.0221822462715426, 0.02273254051033267, 0.02329618505465722, 0.02387348785042898, 0.02446476308205555, 0.0250703312493234, 0.02569051924199958, 0.02632566041187395, 0.02697609464194472, 0.02764216841243169, 0.0283242348632809, 0.02902265385280195, 0.02973779201205797, 0.03047002279460262, 0.0312197265211337, 0.03198729041860669, 0.03277310865332174, 0.03357758235747031, 0.03440111964859391, 0.03524413564137577, 0.03610705245115212, 0.03699029918849069, 0.03789431194414972, 0.03881953376368647, 0.03976641461094426, 0.04073541131960227, 0.041726987531924, 0.04274161362379427, 0.04377976661507994, 0.04484193006429818, 0.04592859394651855, 0.04704025451336548, 0.04817741413392772, 0.04934058111531487, 0.05053026950153419, 0.05174699884929056, 0.0529912939792376, 0.05426368470113316, 0.05556470551127009, 0.05689489526047097, 0.05825479679084898, 0.05964495653944509, 0.06106592410676136, 0.06251825178810969, 0.06400249406559655, 0.06551920705846109, 0.06706894792937244, 0.06865227424418799, 0.07026974328255263, 0.07192191129660713, 0.073609332714951, 0.07533255928888057, 0.07709213917780007, 0.0788886159705717, 0.08072252763944114, 0.08259440542304317, 0.08450477263485505, 0.08645414339333504, 0.08844302126984352, 0.09047189785031225, 0.09254125120649304, 0.09465154427248207, 0.09680322312209336, 0.09899671514252294, 0.1012324270996291, 0.1035107430900409, 0.1058320223751978, 0.108196597092334, 0.1106047698373288, 0.1130568111142818, 0.1155529566466115, 0.1180934045444408, 0.1206783123230215, 0.1233077937669541, 0.1259819156350004, 0.1287006942003541, 0.131464091621335, 0.1342720121376221, 0.1371242980873224, 0.1400207257404145, 0.1429610009443968, 0.1459447545783195, 0.1489715378118116, 0.1520408171661968, 0.155151969375378, 0.1583042760448362, 0.1614969181078503, 0.1647289700789254, 0.1679993941053995, 0.1713070338193187, 0.1746506079929276, 0.178028704002518, 0.181439771106953, 0.1848821135489102, 0.1883538834888163, 0.1918530737835653, 0.1953775106244363, 0.1989248460512027, 0.2024925503621966, 0.2060779044431599, 0.2096779920410266, 0.2132896920123583, 0.2169096705800706, 0.2205343736362474, 0.2241600191333677, 0.2277825896110929, 0.2313978249109116, 0.2350012151364599, 0.2385879939231497, 0.2421531320869275, 0.2456913317285045, 0.2491970208762155, 0.2526643487578452, 0.2560871817991763, 0.2594591004547424, 0.2627733969842035, 0.2660230742958781, 0.2692008459872466, 0.2722991377205431, 0.2753100900798867, 0.2782255630646127, 0.2810371423814544, 0.2837361477059183, 0.2863136430903533, 0.2887604497027879, 0.2910671610863099, 0.2932241611334455, 0.295221644973411, 0.2970496429719681, 0.2986980480436798, 0.3001566464742715, 0.3014151524462103, 0.3024632464531924, 0.3032906177784784, 0.3038870111975895, 0.3042422780472377, 0.3043464317790405, 0.304189708088047, 0.3037626296718084, 0.3030560756351307, 0.3020613555081464, 0.3007702877903941, 0.2991752828706004, 0.2972694301002899, 0.2950465887186818, 0.2925014822360926, 0.289629795782876, 0.2864282758204393, 0.2828948314899503, 0.2790286367428725, 0.2748302322556078, 0.2703016259786196, 0.2654463910089971, 0.2602697593054454, 0.2547787095873004, 0.248982047575998, 0.2428904765505259, 0.2365166560002082, 0.2298752459718835, 0.2229829345276808, 0.2158584455585398, 0.2085225240423715, 0.2009978956999131, 0.1933091978925517, 0.1854828785317736, 0.1775470597375668, 0.1695313630019517, 0.1614666926932005, 0.1533849748867259, 0.1453188487406316, 0.1373013079588935, 0.1293652903145764, 0.121543213750722, 0.1138664582490228, 0.106364793466483, 0.09906575309760929, 0.0919939580322879, 0.08517039165306518, 0.07861163205344551, 0.07232904756041533, 0.06632796370527919, 0.06060681169729379, 0.05515627049877065, 0.04996420075101188, 0.04503493592221131, 0.04037628830565626, 0.0359943390976098, 0.03189337031630107, 0.02807581289404204, 0.02454221260818712, 0.02129121538890238, 0.01831957337426543, 0.01562217287497714, 0.01319208516138803, 0.01102064069523701, 0.009097527098942793, 0.007410910788998581, 0.005947581800879786, 0.004693120905533976, 0.003632087668113404, 0.002748227634946976, 0.002024696362768419, 0.001444297531536132, 0.0009897307852349546, 0.0006438444749196914, 0.0003898965165846091, 0.0002118186115419862, 9.446471719082306e-05, 2.385271278418366e-05, -1.261182739609179e-05, -2.593392874142286e-05, -2.535556328270294e-05, -1.821596352841182e-05, -9.851983898107042e-06, -3.5447091586982e-06, -5.186480378782665e-07, -1.387778780781446e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 3 + }, + { + "i": 2, + "radial_function": [ + 2.764048976020664e-15, 2.905764792604247e-15, 3.054746534099845e-15, 3.211366732414424e-15, 3.376017019572e-15, 3.549109106997333e-15, 3.731075815008655e-15, 3.922372155093599e-15, 4.123476467674497e-15, 4.334891618208297e-15, 4.557146254611614e-15, 4.790796129155485e-15, 5.036425488135e-15, 5.294648532788845e-15, 5.566110955121814e-15, 5.851491552470409e-15, 6.151503924849167e-15, 6.466898259321688e-15, 6.798463205858193e-15, 7.147027849370573e-15, 7.513463782855443e-15, 7.898687286829777e-15, 8.303661620508374e-15, 8.729399430452527e-15, 9.176965282712776e-15, 9.647478324797041e-15, 1.014211508412111e-14, 1.066211240993842e-14, 1.120877056610572e-14, 1.178345648241851e-14, 1.238760717264527e-14, 1.302273332780862e-14, 1.369042309369742e-14, 1.439234604205614e-14, 1.513025734538142e-14, 1.590600216576456e-14, 1.672152026875502e-14, 1.757885087378145e-14, 1.848013775325894e-14, 1.942763459313322e-14, 2.042371062826498e-14, 2.147085656674715e-14, 2.257169081796762e-14, 2.372896603999159e-14, 2.494557602263542e-14, 2.622456292344206e-14, 2.756912487465365e-14, 2.898262398020106e-14, 3.046859472270756e-14, 3.203075280152888e-14, 3.367300442392803e-14, 3.539945607261927e-14, 3.721442477410395e-14, 3.912244889347463e-14, 4.112829948268082e-14, 4.323699221063084e-14, 4.545379990496378e-14, 4.77842657368508e-14, 5.023421708179448e-14, 5.280978009108716e-14, 5.551739501036096e-14, 5.836383228353786e-14, 6.135620948244506e-14, 6.450200910442903e-14, 6.78090972824731e-14, 7.128574345459975e-14, 7.494064104174568e-14, 7.87829291858118e-14, 8.282221560224505e-14, 8.706860060429775e-14, 9.15327023590317e-14, 9.62256834382268e-14, 1.011592787305804e-13, 1.063458247849932e-13, 1.117982906583179e-13, 1.175303103446987e-13, 1.235562168676003e-13, 1.298910781197673e-13, 1.365507345407352e-13, 1.435518387261071e-13, 1.509118970676335e-13, 1.586493135282248e-13, 1.66783435661352e-13, 1.75334602989905e-13, 1.843241978654892e-13, 1.937746989353208e-13, 2.037097373504292e-13, 2.141541558557077e-13, 2.251340709095671e-13, 2.366769379885311e-13, 2.488116202400526e-13, 2.615684606552365e-13, 2.74979357941923e-13, 2.89077846287858e-13, 3.038991792134041e-13, 3.194804177234495e-13, 3.35860522978962e-13, 3.530804537198945e-13, 3.711832686830594e-13, 3.902142342710676e-13, 4.102209377415455e-13, 4.312534061996821e-13, 4.533642316916291e-13, 4.766087027115564e-13, 5.010449424512068e-13, 5.267340541376132e-13, 5.537402738224361e-13, 5.821311310049374e-13, 6.119776174902467e-13, 6.433543649051488e-13, 6.763398313152498e-13, 7.11016497410191e-13, 7.474710727474477e-13, 7.857947125704263e-13, 8.260832457430335e-13, 8.684374143706241e-13, 9.129631257065501e-13, 9.597717169741638e-13, 1.008980233766476e-12, 1.060711722719615e-12, 1.115095539191871e-12, 1.172267670717743e-12, 1.232371077045723e-12, 1.295556047610102e-12, 1.361980577330671e-12, 1.431810761679939e-12, 1.505221212005807e-12, 1.582395492148152e-12, 1.663526577441088e-12, 1.748817337248663e-12, 1.838481042240482e-12, 1.932741897675798e-12, 2.031835604029452e-12, 2.136009946361558e-12, 2.2455254139046e-12, 2.360655851417188e-12, 2.481689143933236e-12, 2.608927936618723e-12, 2.742690391535982e-12, 2.883310983207906e-12, 3.03114133497115e-12, 3.186551098209876e-12, 3.349928876668298e-12, 3.521683198153384e-12, 3.702243536057413e-12, 3.89206138325456e-12, 4.091611381056913e-12, 4.301392506052722e-12, 4.521929317794556e-12, 4.75377327045723e-12, 4.997504091745051e-12, 5.253731232496562e-12, 5.523095390611233e-12, 5.806270113108703e-12, 6.103963480326539e-12, 6.416919876467422e-12, 6.745921850923431e-12, 7.091792075031155e-12, 7.455395399150549e-12, 7.8376410152113e-12, 8.239484730133545e-12, 8.661931355808161e-12, 9.106037221612063e-12, 9.572912815741154e-12, 1.006372556196557e-11, 1.057970273874952e-11, 1.112213454803626e-11, 1.169237734136966e-11, 1.22918570114205e-11, 1.292207255739724e-11, 1.358459983325557e-11, 1.428109548808057e-11, 1.501330110849201e-11, 1.578304757343202e-11, 1.659225963222353e-11, 1.744296071734538e-11, 1.833727800396055e-11, 1.927744772884501e-11, 2.026582078201942e-11, 2.13048685850639e-11, 2.239718927081296e-11, 2.354551417988487e-11, 2.475271469028596e-11, 2.602180939716922e-11, 2.735597166069832e-11, 2.875853754088804e-11, 3.023301413926498e-11, 3.178308836820105e-11, 3.341263616984961e-11, 3.512573220773457e-11, 3.692666005522185e-11, 3.881992290635358e-11, 4.081025483581908e-11, 4.290263263622225e-11, 4.51022882622396e-11, 4.741472191278223e-11, 4.984571578387661e-11, 5.240134852664373e-11, 5.508801044653194e-11, 5.791241948180477e-11, 6.088163800123087e-11, 6.400309046298419e-11, 6.728458197889592e-11, 7.073431783048357e-11, 7.436092398554863e-11, 7.817346866663682e-11, 8.218148502529828e-11, 8.639499497882599e-11, 9.08245342690811e-11, 9.548117880605527e-11, 1.003765723620296e-10, 1.05522955685586e-10, 1.109331971082459e-10, 1.166208247202731e-10, 1.226000601960829e-10, 1.28885854353821e-10, 1.354939245380366e-10, 1.424407939188895e-10, 1.497438328061675e-10, 1.574213020813968e-10, 1.654923988566281e-10, 1.739773044740704e-10, 1.828972349665553e-10, 1.922744941050097e-10, 2.021325291655557e-10, 2.124959895556509e-10, 2.233907884458704e-10, 2.348441675613795e-10, 2.468847652951088e-10, 2.595426883129149e-10, 2.728495868297252e-10, 2.868387337449014e-10, 3.015451078346149e-10, 3.170054812092568e-10, 3.332585112545175e-10, 3.503448372859634e-10, 3.683071821588026e-10, 3.871904590867967e-10, 4.070418839374139e-10, 4.279110932839367e-10, 4.49850268509629e-10, 4.729142662742665e-10, 4.971607556691171e-10, 5.22650362403299e-10, 5.49446820381945e-10, 5.776171310550847e-10, 6.072317309356242e-10, 6.383646677051528e-10, 6.710937853478403e-10, 7.055009187752056e-10, 7.416720984282689e-10, 7.796977653685682e-10, 8.19672997395677e-10, 8.616977467564872e-10, 9.058770900404353e-10, 9.52321490885335e-10, 1.001147076150507e-09, 1.052475926247496e-09, 1.106436380354149e-09, 1.163163357274915e-09, 1.222798692749382e-09, 1.285491494052216e-09, 1.351398512770755e-09, 1.420684536692078e-09, 1.493522801779023e-09, 1.570095425264864e-09, 1.650593860949151e-09, 1.735219377832631e-09, 1.82418356328757e-09, 1.917708852021046e-09, 2.016029082153222e-09, 2.119390079800442e-09, 2.228050273624091e-09, 2.342281340881145e-09, 2.462368886591001e-09, 2.588613157515853e-09, 2.721329792739031e-09, 2.860850612716885e-09, 3.007524448776229e-09, 3.161718015130174e-09, 3.323816825591424e-09, 3.494226157273925e-09, 3.673372063690913e-09, 3.861702439781e-09, 4.059688141523543e-09, 4.267824162940811e-09, 4.486630873428185e-09, 4.716655318503641e-09, 4.958472587226948e-09, 5.212687249704957e-09, 5.479934868274484e-09, 5.760883586138705e-09, 6.056235797425736e-09, 6.366729902842045e-09, 6.693142153529525e-09, 7.036288598731827e-09, 7.397027114009195e-09, 7.77625955197959e-09, 8.174934000342437e-09, 8.594047136791615e-09, 9.034646741673964e-09, 9.497834298113401e-09, 9.984767756549294e-09, 1.049666441787906e-08, 1.103480398712278e-08, 1.160053176194381e-08, 1.219526200533528e-08, 1.282048147061842e-08, 1.347775311912574e-08, 1.416872002975199e-08, 1.489510950311909e-08, 1.565873736987786e-08, 1.646151254662517e-08, 1.730544178748435e-08, 1.819263471085111e-08, 1.912530906624843e-08, 2.010579627542903e-08, 2.11365472584192e-08, 2.22201385493559e-08, 2.335927877721851e-08, 2.455681532115294e-08, 2.58157416258505e-08, 2.713920445069782e-08, 2.85305118395332e-08, 2.999314148316028e-08, 3.153074919290147e-08, 3.314717825175815e-08, 3.484646880377785e-08, 3.663286814426399e-08, 3.851084122781414e-08, 4.04850818036948e-08, 4.256052420712918e-08, 4.474235561199058e-08, 4.703602908668719e-08, 4.944727705113321e-08, 5.198212584175905e-08, 5.464691048046978e-08, 5.744829069063728e-08, 6.039326753962233e-08, 6.348920075182491e-08, 6.674382723157377e-08, 7.016528036981986e-08, 7.37621102797461e-08, 7.754330526377485e-08, 8.15183141264923e-08, 8.569706986918698e-08, 9.009001448257129e-08, 9.470812486193141e-08, 9.956294051736918e-08, 1.046665920251631e-07, 1.100318315827558e-07, 1.156720648212314e-07, 1.216013839985057e-07, 1.278346035199711e-07, 1.343872967137686e-07, 1.412758347832516e-07, 1.485174275515931e-07, 1.561301665202853e-07, 1.641330700713924e-07, 1.72546130823331e-07, 1.813903653152166e-07, 1.906878669502508e-07, 2.004618607262824e-07, 2.107367612011351e-07, 2.215382335964393e-07, 2.328932575754508e-07, 2.448301948235528e-07, 2.573788594103963e-07, 2.70570592652983e-07, 2.84438340797227e-07, 2.990167373284321e-07, 3.143421895883682e-07, 3.304529691055743e-07, 3.473893074274927e-07, 3.651934961151092e-07, 3.839099922541981e-07, 4.035855294804664e-07, 4.242692342466113e-07, 4.460127480765084e-07, 4.688703565133968e-07, 4.928991246702009e-07, 5.181590386796874e-07, 5.447131557131719e-07, 5.72627760938327e-07, 6.019725324452827e-07, 6.328207151511066e-07, 6.652493026580268e-07, 6.993392294107514e-07, 7.35175572240402e-07, 7.72847761859906e-07, 8.124498056948092e-07, 8.54080521757614e-07, 8.978437846677268e-07, 9.438487841500279e-07, 9.92210296625238e-07, 1.043048970666817e-06, 1.096491627404773e-06, 1.152671575662292e-06, 1.211728943684769e-06, 1.2738110272305e-06, 1.339072656266096e-06, 1.40767657925884e-06, 1.479793868075945e-06, 1.555604343231362e-06, 1.635297019928117e-06, 1.719070578294097e-06, 1.8071338567066e-06, 1.899706370233778e-06, 1.997018855843615e-06, 2.099313845053835e-06, 2.206846265979562e-06, 2.319884075882103e-06, 2.438708926120206e-06, 2.563616860664026e-06, 2.694919050215294e-06, 2.832942564003771e-06, 2.978031180448721e-06, 3.1305462391707e-06, 3.290867536298554e-06, 3.459394265553246e-06, 3.636546006837615e-06, 3.822763764842608e-06, 4.018511061372066e-06, 4.224275082027681e-06, 4.44056788160567e-06, 4.667927651126565e-06, 4.906920048114593e-06, 5.158139595338903e-06, 5.422211150155446e-06, 5.699791448193792e-06, 5.991570725539086e-06, 6.298274423072161e-06, 6.620664976880284e-06, 6.959543700093396e-06, 7.315752758921818e-06, 7.690177249921632e-06, 8.083747381026665e-06, 8.497440764327716e-06, 8.932284823802602e-06, 9.389359325024556e-06, 9.86979903264422e-06, 1.037479650231619e-05, 1.090560501337239e-05, 1.146354165001656e-05, 1.204999053654341e-05, 1.266640623772293e-05, 1.331431732879501e-05, 1.39953301469299e-05, 1.471113273001526e-05, 1.546349895492193e-05, 1.625429288338865e-05, 1.708547332702423e-05, 1.795909864080593e-05, 1.887733175863198e-05, 1.98424454803406e-05, 2.085682802562141e-05, 2.192298886507206e-05, 2.304356484444841e-05, 2.42213266153365e-05, 2.545918538789854e-05, 2.676020002167901e-05, 2.812758447068173e-05, 2.956471560076193e-05, 3.107514139730588e-05, 3.266258958230891e-05, 3.433097666172252e-05, 3.608441742316169e-05, 3.792723490688484e-05, 3.986397087223162e-05, 4.189939678554592e-05, 4.403852535314919e-05, 4.628662262701113e-05, 4.864922071024216e-05, 5.113213109340093e-05, 5.374145864986831e-05, 5.648361632337735e-05, 5.936534054245194e-05, 6.23937073944553e-05, 6.557614959773758e-05, 6.892047430999742e-05, 7.243488181261009e-05, 7.612798511354377e-05, 8.000883051311161e-05, 8.408691917705544e-05, 8.83722297681168e-05, 9.287524218326027e-05, 9.76069624514908e-05, 0.0001025789488461508, 0.000107803339270166, 0.0001132928799733494, 0.0001190609556658647, 0.0001251216210920575, 0.000131489634134585, 0.0001381804905199822, 0.0001452104601999174, 0.0001525966254871208, 0.0001603569210268806, 0.0001685101756887975, 0.0001770761564686286, 0.0001860756144912518, 0.0001955303332126244, 0.0002054631789197731, 0.0002158981536345179, 0.0002268604505303242, 0.0002383765119742915, 0.0002504740903155591, 0.0002631823115392688, 0.0002765317419183892, 0.0002905544577941848, 0.0003052841186242671, 0.000320756043443762, 0.0003370072908873193, 0.0003540767429299433, 0.0003720051925051789, 0.0003908354351720211, 0.0004106123650007572, 0.0004313830748608644, 0.0004531969612968282, 0.0004761058341851338, 0.0005001640313740082, 0.0005254285385121975, 0.0005519591142826492, 0.0005798184212618789, 0.0006090721626345801, 0.0006397892249998551, 0.0006720418275131725, 0.0007059056776155106, 0.0007414601336080599, 0.0007787883743403327, 0.0008179775762842123, 0.0008591190982762773, 0.0009023086742172047, 0.0009476466140245174, 0.000995238013141052, 0.001045192970910563, 0.001097626818136803, 0.001152660354147883, 0.001210420093697987, 0.001271038524037686, 0.001334654372496267, 0.001401412884916685, 0.0014714661152945, 0.001544973226970348, 0.001622100805730043, 0.001703023185167433, 0.001787922784664591, 0.001876990460345227, 0.001970425869351425, 0.002068437847794299, 0.002171244802720092, 0.002279075118429958, 0.00239216757747852, 0.002510771796668939, 0.002635148678346542, 0.002765570877277376, 0.002902323283379953, 0.003045703520555112, 0.00319602246183471, 0.003353604761040735, 0.003518789401112656, 0.003691930259225392, 0.003873396688775982, 0.004063574118272907, 0.004262864667108131, 0.004471687778133847, 0.004690480866903351, 0.004919699987362425, 0.005159820513700192, 0.00541133783798481, 0.005674768083109979, 0.005950648830481286, 0.006239539861755366, 0.006542023913822344, 0.006858707446091933, 0.007190221418995465, 0.007537222082462079, 0.007900391772957116, 0.008280439717488208, 0.0086781028427865, 0.00909414658766076, 0.009529365716290266, 0.009984585129982019, 0.01046066067465069, 0.0109584799410048, 0.01147896305411664, 0.01202306344873907, 0.01259176862638817, 0.01318610088984998, 0.01380711805038463, 0.01445591410249352, 0.01513361986068324, 0.01584140355220492, 0.0165804713592648, 0.01735206790369892, 0.0181574766665683, 0.01899802033457747, 0.01987506106463092, 0.02079000065723563, 0.02174428062881939, 0.02273938217237539, 0.02377682599515798, 0.02485817202144591, 0.02598501894765835, 0.02715900363636065, 0.02838180033492541, 0.02965511970383264, 0.03098070763879578, 0.03236034387009423, 0.03379584032168653, 0.03528903921186555, 0.036841810876417, 0.03845605129445049, 0.04013367929630051, 0.04187663343215397, 0.04368686847935094, 0.04556635156564766, 0.04751705788513009, 0.04954096598293091, 0.05164005258446393, 0.0538162869445349, 0.05607162469146194, 0.05840800114123937, 0.06082732405683333, 0.0633314658279304, 0.06592225504688491, 0.06860146745725915, 0.07137081625224674, 0.07423194170143516, 0.0771864000858435, 0.080235651922972, 0.08338104946577905, 0.08662382346207681, 0.08996506916384245, 0.09340573157943294, 0.09694658996567403, 0.1005882415613348, 0.1043310845686221, 0.1081753003950616, 0.1121208351745449, 0.1161673805934031, 0.1203143540551923, 0.1245608782264583, 0.1289057600150967, 0.1333474690431253, 0.1378841156866489, 0.1425134287676627, 0.1472327329950105, 0.1520389262653106, 0.1569284569490234, 0.161897301301918, 0.1669409411580814, 0.1720543420771582, 0.177231932135636, 0.1824675815696896, 0.1877545834950995, 0.1930856359480699, 0.1984528255091245, 0.2038476127904451, 0.209260820084915, 0.2146826214923025, 0.2201025358543364, 0.2255094228454409, 0.2308914825792523, 0.2362362591024032, 0.2415306481558439, 0.2467609095898317, 0.2519126848210489, 0.2569710197185453, 0.2619203932988624, 0.2667447525990149, 0.2714275540785176, 0.2759518118776055, 0.2803001532275887, 0.2844548812703608, 0.2883980454967083, 0.2921115199568405, 0.2955770893308786, 0.2987765428715182, 0.3016917761454791, 0.3043049004043671, 0.3065983593093314, 0.3085550526174718, 0.3101584663117584, 0.3113928085209558, 0.3122431504325479, 0.3126955712512451, 0.3127373060998798, 0.3123568956003059, 0.3115443357116975, 0.3102912262450939, 0.3085909163193908, 0.3064386448787349, 0.3038316742584871, 0.3007694146708087, 0.2972535373862094, 0.2932880743188479, 0.2888795016860186, 0.2840368054110847, 0.2787715259788889, 0.2730977805379817, 0.2670322601786917, 0.260594200503305, 0.2538053238464894, 0.2466897518011782, 0.2392738870565047, 0.2315862639568581, 0.223657367639345, 0.2155194220927999, 0.2072061479937902, 0.1987524916997563, 0.190194327298764, 0.1815681341085937, 0.1729106524610185, 0.1642585209731216, 0.1556478987675153, 0.1471140762266038, 0.1386910778220684, 0.1304112603201856, 0.1223049092002711, 0.1143998354173268, 0.1067209736787494, 0.09928998218860882, 0.09212484235648469, 0.08523945530500515, 0.07864323019818778, 0.07234065753515409, 0.066330858725245, 0.06060710162903567, 0.05515627049877064, 0.04996420075101188, 0.04503493592221131, 0.04037628830565626, 0.0359943390976098, 0.03189337031630107, 0.02807581289404204, 0.02454221260818712, 0.02129121538890238, 0.01831957337426543, 0.01562217287497714, 0.01319208516138803, 0.01102064069523701, 0.009097527098942793, 0.007410910788998581, 0.005947581800879786, 0.004693120905533976, 0.003632087668113404, 0.002748227634946976, 0.002024696362768419, 0.001444297531536132, 0.0009897307852349546, 0.0006438444749196914, 0.0003898965165846091, 0.0002118186115419862, 9.446471719082306e-05, 2.385271278418366e-05, -1.261182739609179e-05, -2.593392874142286e-05, -2.535556328270294e-05, -1.821596352841182e-05, -9.851983898107042e-06, -3.5447091586982e-06, -5.186480378782665e-07, -1.387778780781446e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 2, + "j": 3 + }, + { + "i": 3, + "radial_function": [ + 1.858786480530317e-08, 1.90584188308621e-08, 1.954088498766175e-08, 2.003556483270786e-08, 2.054276755695658e-08, 2.106281017856854e-08, 2.159601774105547e-08, 2.214272351644294e-08, 2.270326921357587e-08, 2.327800519169782e-08, 2.386729067943646e-08, 2.447149399933315e-08, 2.50909927980563e-08, 2.572617428244263e-08, 2.637743546151399e-08, 2.704518339462047e-08, 2.772983544586569e-08, 2.843181954497272e-08, 2.915157445475369e-08, 2.98895500453509e-08, 3.064620757541968e-08, 3.142201998043002e-08, 3.22174721682662e-08, 3.303306132230975e-08, 3.386929721219509e-08, 3.472670251243142e-08, 3.560581312909129e-08, 3.650717853476885e-08, 3.743136211201753e-08, 3.837894150548256e-08, 3.93505089829466e-08, 4.034667180551636e-08, 4.136805260717992e-08, 4.241528978397268e-08, 4.348903789299544e-08, 4.458996806153287e-08, 4.571876840652962e-08, 4.687614446468506e-08, 4.80628196334361e-08, 4.927953562310359e-08, 5.05270529204843e-08, 5.180615126417952e-08, 5.311763013195608e-08, 5.446230924044518e-08, 5.584102905749135e-08, 5.725465132747073e-08, 5.870405960990884e-08, 6.019015983173284e-08, 6.171388085350421e-08, 6.327617504998609e-08, 6.487801890540676e-08, 6.652041362379307e-08, 6.820438575475443e-08, 6.99309878351078e-08, 7.17012990467469e-08, 7.351642589116352e-08, 7.537750288104595e-08, 7.728569324938445e-08, 7.924218967652791e-08, 8.124821503564651e-08, 8.330502315706447e-08, 8.541389961194334e-08, 8.757616251580347e-08, 8.979316335238672e-08, 9.206628781837596e-08, 9.439695668949719e-08, 9.678662670854862e-08, 9.923679149590932e-08, 1.017489824830974e-07, 1.043247698699625e-07, 1.06965763606107e-07, 1.096736143971549e-07, 1.12450014736492e-07, 1.15296699963126e-07, 1.182154493463267e-07, 1.212080871977215e-07, 1.242764840115461e-07, 1.274225576337598e-07, 1.306482744607561e-07, 1.339556506684205e-07, 1.373467534722997e-07, 1.408237024196731e-07, 1.443886707143336e-07, 1.480438865749035e-07, 1.517916346275388e-07, 1.556342573338861e-07, 1.595741564551911e-07, 1.636137945534702e-07, 1.677556965306835e-07, 1.720024512068728e-07, 1.763567129382491e-07, 1.808212032762424e-07, 1.853987126685507e-07, 1.900921022032496e-07, 1.94904305397056e-07, 1.998383300288587e-07, 2.048972600196674e-07, 2.100842573601504e-07, 2.154025640869687e-07, 2.208555043091418e-07, 2.264464862857079e-07, 2.321790045559822e-07, 2.380566421237413e-07, 2.440830726967005e-07, 2.502620629826846e-07, 2.56597475043921e-07, 2.630932687109373e-07, 2.697535040575622e-07, 2.765823439385828e-07, 2.835840565916425e-07, 2.907630183050025e-07, 2.981237161528403e-07, 3.056707507997909e-07, 3.134088393764832e-07, 3.21342818427873e-07, 3.294776469362076e-07, 3.3781840942052e-07, 3.463703191145854e-07, 3.55138721225325e-07, 3.641290962736991e-07, 3.733470635201683e-07, 3.827983844768749e-07, 3.924889665087314e-07, 4.024248665256694e-07, 4.126122947683598e-07, 4.230576186897604e-07, 4.337673669349313e-07, 4.447482334215927e-07, 4.560070815239829e-07, 4.675509483626317e-07, 4.793870492027192e-07, 4.915227819637865e-07, 5.039657318436032e-07, 5.167236760590873e-07, 5.298045887072421e-07, 5.432166457491376e-07, 5.569682301200678e-07, 5.710679369690656e-07, 5.855245790310519e-07, 6.003471921349837e-07, 6.155450408514283e-07, 6.311276242831125e-07, 6.471046820020514e-07, 6.634862001369722e-07, 6.802824176148416e-07, 6.975038325603874e-07, 7.151612088576239e-07, 7.332655828774781e-07, 7.518282703757199e-07, 7.708608735655136e-07, 7.903752883689938e-07, 8.103837118524246e-07, 8.308986498495643e-07, 8.519329247780125e-07, 8.734996836534283e-07, 8.956124063066063e-07, 9.182849138085767e-07, 9.415313771089725e-07, 9.653663258930686e-07, 9.898046576630359e-07, 1.014861647049064e-06, 1.040552955356205e-06, 1.066894640352867e-06, 1.093903166307111e-06, 1.121595414277e-06, 1.149988692661416e-06, 1.17910074801801e-06, 1.208949776154935e-06, 1.239554433503396e-06, 1.270933848778078e-06, 1.303107634932709e-06, 1.336095901418316e-06, 1.369919266751738e-06, 1.404598871402312e-06, 1.440156391004785e-06, 1.476614049906636e-06, 1.513994635058413e-06, 1.5523215102556e-06, 1.591618630741044e-06, 1.631910558177023e-06, 1.673222475996253e-06, 1.715580205141566e-06, 1.759010220203924e-06, 1.803539665968996e-06, 1.849196374382574e-06, 1.896008881945393e-06, 1.944006447548341e-06, 1.993219070759071e-06, 2.043677510571514e-06, 2.095413304630048e-06, 2.148458788940183e-06, 2.202847118078309e-06, 2.258612285912884e-06, 2.31578914685022e-06, 2.374413437618024e-06, 2.434521799600307e-06, 2.496151801737723e-06, 2.559341964007485e-06, 2.624131781497661e-06, 2.690561749090867e-06, 2.758673386772673e-06, 2.828509265580751e-06, 2.90011303421071e-06, 2.973529446295482e-06, 3.048804388375177e-06, 3.125984908574839e-06, 3.205119246008218e-06, 3.286256860925668e-06, 3.369448465625218e-06, 3.454746056146058e-06, 3.542202944764119e-06, 3.631873793310341e-06, 3.723814647332095e-06, 3.818082971119375e-06, 3.914737683617518e-06, 4.013839195248839e-06, 4.115449445666406e-06, 4.219631942463205e-06, 4.326451800861199e-06, 4.435975784404889e-06, 4.548272346684778e-06, 4.66341167411703e-06, 4.781465729805701e-06, 4.902508298515244e-06, 5.026615032781264e-06, 5.153863500188185e-06, 5.284333231843737e-06, 5.418105772080073e-06, 5.555264729412901e-06, 5.695895828790395e-06, 5.840086965164294e-06, 5.987928258417101e-06, 6.139512109679129e-06, 6.29493325907104e-06, 6.454288844907753e-06, 6.617678464400501e-06, 6.785204235895455e-06, 6.956970862687172e-06, 7.133085698447188e-06, 7.313658814308454e-06, 7.498803067647293e-06, 7.688634172606378e-06, 7.883270772402075e-06, 8.082834513461858e-06, 8.28745012143785e-06, 8.49724547914389e-06, 8.712351706465018e-06, 8.932903242289e-06, 9.15903792851126e-06, 9.390897096165587e-06, 9.628625653734306e-06, 9.872372177693235e-06, 1.012228900534773e-05, 1.037853233001799e-05, 1.064126229863283e-05, 1.091064311179303e-05, 1.118684312636657e-05, 1.147003496067986e-05, 1.176039560237067e-05, 1.205810651896991e-05, 1.236335377128146e-05, 1.267632812963068e-05, 1.29972251930542e-05, 1.332624551150538e-05, 1.366359471115163e-05, 1.400948362284187e-05, 1.436412841382417e-05, 1.472775072279588e-05, 1.510057779837039e-05, 1.548284264104702e-05, 1.587478414877242e-05, 1.627664726618461e-05, 1.668868313763232e-05, 1.711114926406543e-05, 1.754430966389429e-05, 1.798843503791801e-05, 1.844380293842505e-05, 1.891069794257087e-05, 1.938941183014151e-05, 1.98802437658135e-05, 2.038350048602361e-05, 2.089949649056556e-05, 2.142855423903226e-05, 2.19710043522269e-05, 2.252718581866794e-05, 2.309744620631661e-05, 2.36821418796595e-05, 2.428163822228063e-05, 2.489630986506261e-05, 2.552654092015847e-05, 2.617272522087998e-05, 2.683526656765245e-05, 2.751457898018839e-05, 2.821108695603779e-05, 2.892522573567603e-05, 2.965744157429374e-05, 3.040819202045905e-05, 3.117794620182472e-05, 3.196718511805863e-05, 3.277640194117982e-05, 3.360610232348691e-05, 3.44568047132709e-05, 3.532904067850831e-05, 3.622335523873683e-05, 3.714030720531928e-05, 3.808046953030777e-05, 3.904442966412566e-05, 4.00327899222887e-05, 4.104616786139452e-05, 4.208519666461367e-05, 4.315052553692191e-05, 4.424282011031981e-05, 4.536276285929116e-05, 4.65110535267586e-05, 4.768840956080141e-05, 4.889556656240596e-05, 5.013327874452844e-05, 5.140231940275321e-05, 5.270348139784075e-05, 5.403757765046383e-05, 5.54054416484392e-05, 5.680792796677025e-05, 5.824591280082239e-05, 5.972029451296296e-05, 6.123199419300423e-05, 6.278195623279775e-05, 6.43711489153349e-05, 6.600056501872116e-05, 6.767122243539665e-05, 6.938416480698732e-05, 7.114046217518121e-05, 7.294121164903032e-05, 7.478753808909494e-05, 7.668059480885113e-05, 7.862156429379716e-05, 8.061165893870405e-05, 8.265212180346458e-05, 8.474422738801123e-05, 8.688928242677985e-05, 8.90886267032116e-05, 9.134363388479655e-05, 9.365571237917339e-05, 9.602630621181703e-05, 9.845689592585326e-05, 0.0001009489995045579, 0.0001035041733171094, 0.0001061240130881766, 0.0001088101548919433, 0.0001115642761711794, 0.0001143880967819875, 0.0001172833800648708, 0.0001202519339427776, 0.000123295612046803, 0.0001264163148702387, 0.0001296159909516794, 0.0001328966380879172, 0.0001362603045773646, 0.0001397090904947749, 0.0001432451489980395, 0.0001468706876678661, 0.0001505879698811606, 0.0001543993162189503, 0.0001583071059097188, 0.0001623137783090309, 0.0001664218344163566, 0.0001706338384300226, 0.0001749524193412399, 0.0001793802725681858, 0.0001839201616311366, 0.0001885749198696739, 0.0001933474522030155, 0.0001982407369345384, 0.0002032578276016017, 0.00020840185487179, 0.0002136760284867358, 0.0002190836392547017, 0.0002246280610931321, 0.000230312753122419, 0.0002361412618121515, 0.0002421172231811475, 0.0002482443650526094, 0.000254526509365758, 0.0002609675745453539, 0.0002675715779305353, 0.0002743426382644364, 0.0002812849782460942, 0.0002884029271461818, 0.0002957009234881358, 0.0003031835177963085, 0.0003108553754127787, 0.0003187212793845257, 0.000326786133422696, 0.0003350549649357315, 0.0003435329281381833, 0.0003522253072370644, 0.0003611375196976477, 0.0003702751195906591, 0.0003796438010228538, 0.0003892494016530274, 0.0003990979062955452, 0.0004091954506135304, 0.0004195483249039031, 0.0004301629779765055, 0.0004410460211296134, 0.0004522042322241746, 0.0004636445598591807, 0.0004753741276506278, 0.0004874002386165717, 0.0004997303796708678, 0.0005123722262282098, 0.0005253336469231671, 0.0005386227084459774, 0.0005522476804978976, 0.0005662170408690125, 0.0005805394806414319, 0.0005952239095208957, 0.0006102794612998714, 0.0006257154994552786, 0.0006415416228840816, 0.0006577676717800305, 0.0006744037336549134, 0.0006914601495077748, 0.0007089475201455962, 0.0007268767126590492, 0.0007452588670569868, 0.0007641054030634211, 0.0007834280270808324, 0.0008032387393237025, 0.0008235498411263049, 0.0008443739424288052, 0.0008657239694458656, 0.0008876131725220094, 0.000910055134178075, 0.0009330637773532362, 0.0009566533738470874, 0.0009808385529664336, 0.001005634310381518, 0.001031056017196461, 0.001057119429238881, 0.001083840696573655, 0.001111236373245966, 0.001139323427258833, 0.00116811925079042, 0.001197641670656577, 0.001227908959024097, 0.001258939844380313, 0.001290753522764805, 0.001323369669268967, 0.001356808449809469, 0.00139109053318159, 0.001426237103398603, 0.001462269872323493, 0.001499211092599322, 0.001537083570884791, 0.001575910681401531, 0.001615716379799844, 0.00165652521734971, 0.001698362355463915, 0.001741253580560404, 0.001785225319270914, 0.001830304654003142, 0.001876519338863803, 0.001923897815949957, 0.001972469232016225, 0.002022263455525465, 0.002073311094090656, 0.002125643512315872, 0.00217929285004416, 0.002234292041020457, 0.002290674831977535, 0.002348475802153216, 0.002407730383247096, 0.002468474879825045, 0.002530746490179981, 0.002594583327657278, 0.002660024442453348, 0.002727109843895988, 0.002795880523214997, 0.002866378476811807, 0.002938646730036691, 0.003012729361482255, 0.003088671527801909, 0.003166519489061904, 0.003246320634635692, 0.003328123509649143, 0.003411977841985221, 0.003497934569856684, 0.003586045869955117, 0.003676365186184814, 0.003768947258989637, 0.003863848155280982, 0.0039611252989749, 0.004060837502145989, 0.004163044996805873, 0.004267809467313478, 0.004375194083424325, 0.004485263533985702, 0.004598084061284172, 0.004713723496051808, 0.00483225129313683, 0.004953738567844209, 0.005078258132951182, 0.005205884536402138, 0.00533669409968697, 0.005470764956906197, 0.005608177094525633, 0.005749012391822793, 0.005893354662026199, 0.006041289694148389, 0.006192905295512158, 0.006348291334968894, 0.006507539786806816, 0.006670744775345739, 0.00683800262021413, 0.007009411882302632, 0.00718507341038718, 0.00736509038841342, 0.007549568383432298, 0.007738615394175653, 0.007932341900258376, 0.008130860911992187, 0.008334288020794025, 0.00854274145016983, 0.008756342107252625, 0.008975213634870992, 0.009199482464121798, 0.009429277867418364, 0.009664732011982051, 0.00990598001374281, 0.01015315999161042, 0.01040641312207504, 0.0106658836940922, 0.01093171916420278, 0.01120407021183566, 0.01148309079473512, 0.0117689382044513, 0.01206177312182696, 0.01236175967240832, 0.01266906548170284, 0.01298386173020064, 0.01330632320807015, 0.0136366283694326, 0.01397495938611212, 0.01432150220075236, 0.01467644657918178, 0.01503998616190216, 0.01541231851456657, 0.01579364517730347, 0.01618417171273487, 0.01658410775252589, 0.01699366704229279, 0.0174130674846855, 0.01784253118044846, 0.01828228446725218, 0.01873255795607386, 0.01919358656489249, 0.01966560954944907, 0.02014887053080741, 0.02064361751943522, 0.02115010293550832, 0.02166858362512286, 0.02219932087208256, 0.02274258040490739, 0.02329863239869109, 0.02386775147141214, 0.02445021667428067, 0.02504631147568084, 0.02565632373824166, 0.02628054568854556, 0.02691927387895446, 0.02757280914100619, 0.0282414565298035, 0.02892552525878636, 0.02962532862424661, 0.03034118391890862, 0.03107341233386474, 0.0318223388481169, 0.03258829210493609, 0.0333716042742125, 0.03417261089992518, 0.03499165073181701, 0.03582906554031515, 0.0366851999136883, 0.03756040103638496, 0.03845501844744315, 0.03936940377781061, 0.04030391046535892, 0.04125889344631722, 0.04223470882179432, 0.04323171349799519, 0.04425026479867645, 0.04529072004832115, 0.04635343612444551, 0.04743876897738518, 0.0485470731158363, 0.04967870105635712, 0.05083400273496435, 0.05201332487888219, 0.05321701033643129, 0.05444539736296593, 0.05569881886069408, 0.05697760157013831, 0.05828206521091798, 0.05961252156946047, 0.06096927353117116, 0.06235261405452073, 0.06376282508443588, 0.06520017640230973, 0.06666492440988493, 0.06815731084419753, 0.06967756142071474, 0.07122588440174825, 0.07280246908717815, 0.07440748422449041, 0.07604107633509868, 0.07770336795390863, 0.07939445577907664, 0.08111440872892356, 0.08286326590299344, 0.0846410344442847, 0.08644768729974936, 0.08828316087623833, 0.09014735258918193, 0.09204011830143427, 0.09396126964987814, 0.09591057125758974, 0.09788773782960646, 0.0998924311306179, 0.1019242568432356, 0.1039827613058691, 0.10606742812967, 0.1081776746945009, 0.1103128485244374, 0.112472223543945, 0.1146549962165713, 0.1168602815687829, 0.1190871091024524, 0.1213344186004658, 0.1236010558310049, 0.1258857681572318, 0.1281872000604116, 0.130503888585935, 0.132834258723255, 0.1351766187324689, 0.1375291554321053, 0.1398899294646995, 0.1422568705589044, 0.1446277728092211, 0.1470002899969666, 0.1493719309787838, 0.1517400551719086, 0.1541018681684911, 0.1564544175145496, 0.1587945886926347, 0.1611191013509494, 0.1634245058255614, 0.1657071800064182, 0.1679633266021183, 0.1701889708628394, 0.1723799588253902, 0.1745319561490903, 0.1766404476160143, 0.1787007373740454, 0.1807079500061642, 0.1826570325143415, 0.1845427573113318, 0.1863597263184571, 0.1881023762720825, 0.1897649853458451, 0.1913416811996763, 0.1928264505701972, 0.1942131505200002, 0.1954955214655582, 0.1966672021048611, 0.1977217463661977, 0.1986526424986119, 0.1994533344222512, 0.2001172454528745, 0.2006378045089706, 0.2010084749019853, 0.2012227857998016, 0.2012743664405687, 0.2011569831579227, 0.2008645792592633, 0.2003913177757139, 0.1997316270753516, 0.1988802492998975, 0.1978322915489728, 0.1965832796948833, 0.1951292146644002, 0.1934666309718225, 0.1915926572294773, 0.1895050782975195, 0.1872023986642339, 0.1846839065709587, 0.1819497383122105, 0.1790009420517116, 0.1758395403990586, 0.1724685908900835, 0.1688922434071842, 0.1651157934647747, 0.1611457301705899, 0.1569897775572034, 0.1526569278613047, 0.1481574652130972, 0.1435029780868217, 0.1387063587586509, 0.1337817879231943, 0.1287447025380967, 0.1236117449019058, 0.1184006909279165, 0.1131303555612118, 0.1078204733030816, 0.1024915518623029, 0.09716469705291085, 0.09186140720964314, 0.08660333560228278, 0.08141201960578981, 0.07630857573143612, 0.07131336005227337, 0.06644559407058716, 0.06172295668155635, 0.05716114359112094, 0.05277339635067232, 0.04857000407874744, 0.04455778195017631, 0.04073953164347359, 0.03711349014181164, 0.03367277457213003, 0.0304085476098426, 0.02732242627431508, 0.0244181745790783, 0.02169837466719757, 0.01916439833007885, 0.01681639063572121, 0.01465326658927257, 0.01267272161816663, 0.01087125651539442, 0.009244217287585371, 0.007785850140017314, 0.006489371589845611, 0.005347053433807869, 0.004350322010319013, 0.003489870892152774, 0.002755785829319235, 0.002137680437954786, 0.001624840806206285, 0.001206376869266612, 0.0008713780993411108, 0.0006090702682608057, 0.0004089698273941345, 0.0002610359645368598, 0.0001558166222948854, 8.457869176410615e-05, 3.942511381291314e-05, 1.339275712937404e-05, 5.282021629943179e-07, -4.062026976982269e-06, -4.189934642025162e-06, -2.596102234304898e-06, -9.938214633256615e-07, -1.493753881628979e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 3 + }, + { + "i": 3, + "radial_function": [ + 1.884672590647477e-15, 1.981301817329608e-15, 2.08288533014477e-15, 2.189677140754437e-15, 2.301944284274895e-15, 2.419967487003885e-15, 2.544041868382382e-15, 2.674477678946687e-15, 2.811601076116091e-15, 2.955754939756094e-15, 3.107299729556322e-15, 3.266614386367212e-15, 3.434097279749179e-15, 3.610167204103647e-15, 3.795264425876848e-15, 3.989851784454753e-15, 4.194415849502218e-15, 4.409468137640094e-15, 4.63554639150264e-15, 4.873215924373725e-15, 5.123071033763779e-15, 5.385736487462514e-15, 5.661869085783086e-15, 5.952159303904222e-15, 6.257333018417039e-15, 6.5781533223936e-15, 6.915422433516206e-15, 7.269983700038378e-15, 7.6427237095936e-15, 8.034574506125139e-15, 8.44651592047988e-15, 8.879578020494549e-15, 9.334843686700252e-15, 9.813451320086218e-15, 1.03165976886936e-14, 1.084554092015682e-14, 1.140160364767612e-14, 1.198617631728716e-14, 1.260072066469804e-14, 1.324677337038748e-14, 1.392594990210344e-14, 1.463994855437116e-14, 1.539055469511077e-14, 1.617964522998354e-14, 1.700919329563024e-14, 1.788127319353596e-14, 1.879806557686035e-14, 1.976186290320153e-14, 2.077507516692911e-14, 2.18402359254202e-14, 2.296000863426634e-14, 2.413719330729372e-14, 2.537473351804933e-14, 2.667572376026063e-14, 2.804341718567422e-14, 2.948123373862062e-14, 3.099276870764754e-14, 3.258180171560415e-14, 3.425230617065592e-14, 3.600845920186382e-14, 3.78546521041695e-14, 3.979550131890721e-14, 4.18358599772971e-14, 4.398083003578543e-14, 4.623577503357721e-14, 4.860633350425884e-14, 5.109843307505032e-14, 5.371830528893974e-14, 5.647250118676364e-14, 5.936790768819797e-14, 6.241176481261725e-14, 6.561168378288628e-14, 6.897566605735141e-14, 7.251212333762073e-14, 7.622989860216564e-14, 8.013828821833281e-14, 8.424706518806512e-14, 8.8566503585452e-14, 9.310740424721849e-14, 9.788112178039209e-14, 1.028995929546779e-13, 1.081753665505409e-13, 1.137216347376298e-13, 1.195522660620014e-13, 1.256818401246369e-13, 1.32125684037955e-13, 1.388999107514907e-13, 1.460214593425683e-13, 1.535081373727154e-13, 1.613786654157353e-13, 1.696527238687722e-13, 1.783510021634302e-13, 1.874952504999912e-13, 1.971083342340976e-13, 2.072142910518947e-13, 2.178383910765916e-13, 2.290072000567504e-13, 2.407486457942947e-13, 2.530920879783468e-13, 2.660683915995153e-13, 2.797100041281942e-13, 2.940510366498754e-13, 3.091273491603384e-13, 3.249766402340073e-13, 3.416385412896905e-13, 3.591547156894007e-13, 3.77568962918077e-13, 3.969273281046871e-13, 4.172782171585806e-13, 4.386725178089901e-13, 4.61163726850326e-13, 4.848080839114649e-13, 5.096647120835006e-13, 5.357957657576067e-13, 5.632665860426834e-13, 5.921458641513876e-13, 6.225058131631239e-13, 6.544223485934627e-13, 6.879752782215113e-13, 7.232485016499016e-13, 7.603302200963703e-13, 7.993131569415518e-13, 8.40294789584423e-13, 8.833775931851806e-13, 9.28669296905017e-13, 9.762831532834984e-13, 1.026338221427165e-12, 1.078959664717417e-12, 1.134279063782134e-12, 1.192434745513598e-12, 1.253572128955391e-12, 1.317844088923218e-12, 1.385411338268818e-12, 1.456442829742854e-12, 1.531116178461611e-12, 1.60961810603384e-12, 1.692144907458346e-12, 1.778902941959737e-12, 1.870109148989656e-12, 1.965991590683798e-12, 2.066790022130987e-12, 2.172756490880429e-12, 2.284155967186042e-12, 2.401267006563837e-12, 2.52438244631907e-12, 2.653810137784729e-12, 2.789873716102385e-12, 2.932913409470161e-12, 3.08328688988132e-12, 3.241370167480761e-12, 3.407558530775583e-12, 3.582267535050879e-12, 3.76593404146207e-12, 3.959017309402085e-12, 4.162000144874805e-12, 4.375390107746064e-12, 4.59972078089113e-12, 4.835553104411917e-12, 5.083476778260084e-12, 5.344111736773356e-12, 5.618109698811735e-12, 5.90615579737004e-12, 6.208970292741161e-12, 6.527310373513877e-12, 6.861972049908566e-12, 7.2137921441845e-12, 7.58365038309654e-12, 7.97247159763207e-12, 8.381228035529442e-12, 8.810941792359805e-12, 9.262687367250638e-12, 9.737594349642473e-12, 1.023685024379543e-11, 1.076170343810904e-11, 1.131346632667961e-11, 1.189351859089951e-11, 1.250331064930548e-11, 1.314436728429973e-11, 1.381829145481377e-11, 1.452676830444777e-11, 1.527156937510616e-11, 1.605455703666766e-11, 1.687768914376253e-11, 1.774302393130345e-11, 1.865272516100947e-11, 1.960906753179036e-11, 2.061444236752208e-11, 2.167136359643148e-11, 2.278247403704334e-11, 2.395055200640637e-11, 2.517851826711922e-11, 2.646944333053028e-11, 2.782655513436713e-11, 2.925324711399639e-11, 3.075308668749263e-11, 3.232982417573131e-11, 3.398740217981212e-11, 3.572996543925453e-11, 3.756187119561823e-11, 3.948770008745938e-11, 4.151226760386101e-11, 4.364063612518059e-11, 4.587812758111269e-11, 4.823033675772223e-11, 5.070314528671628e-11, 5.330273635192973e-11, 5.60356101498018e-11, 5.890860014248956e-11, 6.192889014426272e-11, 6.510403228389759e-11, 6.844196588797679e-11, 7.195103733231644e-11, 7.564002091114324e-11, 7.951814077620673e-11, 8.359509400067769e-11, 8.78810748254916e-11, 9.238680014876974e-11, 9.712353632203144e-11, 1.021031273202054e-10, 1.073380243558647e-10, 1.128413170117217e-10, 1.186267659692324e-10, 1.247088374151178e-10, 1.311027392118408e-10, 1.378244589224596e-10, 1.448908037849218e-10, 1.523194427357545e-10, 1.601289505881921e-10, 1.683388544752076e-10, 1.769696826735555e-10, 1.860430159308759e-10, 1.955815414242078e-10, 2.056091094847774e-10, 2.161507932308999e-10, 2.272329512580715e-10, 2.38883293542961e-10, 2.511309507260955e-10, 2.640065469464023e-10, 2.775422764097253e-10, 2.917719838827202e-10, 3.067312493133437e-10, 3.2245747678952e-10, 3.38989988058322e-10, 3.563701208394967e-10, 3.746413321790882e-10, 3.938493071015224e-10, 4.140420728317824e-10, 4.352701188731913e-10, 4.575865232409894e-10, 4.810470851672543e-10, 5.057104646088919e-10, 5.31638328907447e-10, 5.588955069673256e-10, 5.875501513378438e-10, 6.176739086042512e-10, 6.493420985136493e-10, 6.826339022835633e-10, 7.176325605638504e-10, 7.544255815467961e-10, 7.931049597455679e-10, 8.337674059878677e-10, 8.765145891996927e-10, 9.214533905834922e-10, 9.6869617082609e-10, 1.018361051004205e-09, 1.070572207889678e-09, 1.125460184392517e-09, 1.183162215917631e-09, 1.243822573450963e-09, 1.307592924232492e-09, 1.374632710917496e-09, 1.445109550173765e-09, 1.519199651710845e-09, 1.597088258788637e-09, 1.678970111306202e-09, 1.765049932628059e-09, 1.855542941364677e-09, 1.950675389386004e-09, 2.050685127412634e-09, 2.155822199597975e-09, 2.266349468587172e-09, 2.382543272614851e-09, 2.50469411628355e-09, 2.633107396749044e-09, 2.768104167127104e-09, 2.910021939029151e-09, 3.059215526232236e-09, 3.216057931591141e-09, 3.380941279408831e-09, 3.554277795594677e-09, 3.736500838059331e-09, 3.928065979920756e-09, 4.129452148227469e-09, 4.341162821044021e-09, 4.563727284329144e-09, 4.797701962016321e-09, 5.043671793391158e-09, 5.302251708781742e-09, 5.574088157425422e-09, 5.859860726786702e-09, 6.160283843159328e-09, 6.476108550081372e-09, 6.808124394851239e-09, 7.157161392020442e-09, 7.524092110186598e-09, 7.909833835662674e-09, 8.315350882137901e-09, 8.741656986885705e-09, 9.189817856353535e-09, 9.660953818946999e-09, 1.015624262593166e-08, 1.067692240916349e-08, 1.122429474864447e-08, 1.179972795211711e-08, 1.240466044877651e-08, 1.304060441449149e-08, 1.37091495178297e-08, 1.441196691392271e-08, 1.515081340642119e-08, 1.59275358481862e-08, 1.67440757622922e-08, 1.760247415954576e-08, 1.85048766757039e-08, 1.945353888992125e-08, 2.045083205706669e-08, 2.149924885068533e-08, 2.260140982400387e-08, 2.376006975836077e-08, 2.497812459035911e-08, 2.625861875633121e-08, 2.760475266262271e-08, 2.901989078206289e-08, 3.050756995413847e-08, 3.207150829969844e-08, 3.371561447296357e-08, 3.544399745571826e-08, 3.726097678676892e-08, 3.917109336449379e-08, 4.117912078216592e-08, 4.329007720528769e-08, 4.550923807033718e-08, 4.784214900563386e-08, 5.029463986837439e-08, 5.28728392701144e-08, 5.558318977889714e-08, 5.84324642023564e-08, 6.142778221773326e-08, 6.457662843888648e-08, 6.788687088141794e-08, 7.136678075193593e-08, 7.502505303452928e-08, 7.887082810313574e-08, 8.291371473908594e-08, 8.716381398151084e-08, 9.163174433104458e-08, 9.632866826763856e-08, 1.012663201917808e-07, 1.064570355363379e-07, 1.119137818494136e-07, 1.176501906668987e-07, 1.236805921617606e-07, 1.30020050129168e-07, 1.366844003927712e-07, 1.436902896336829e-07, 1.510552173738011e-07, 1.58797579204173e-07, 1.669367132676238e-07, 1.754929477893937e-07, 1.844876523167056e-07, 1.939432906407392e-07, 2.038834772456091e-07, 2.143330356966572e-07, 2.253180607629259e-07, 2.36865983691624e-07, 2.490056401320559e-07, 2.617673423435114e-07, 2.751829547295673e-07, 2.892859730255305e-07, 3.041116082091383e-07, 3.196968735973959e-07, 3.360806776614771e-07, 3.533039208491609e-07, 3.714095970739676e-07, 3.90442901324903e-07, 4.104513420110321e-07, 4.314848591362937e-07, 4.535959489547167e-07, 4.768397945548556e-07, 5.012744031744901e-07, 5.269607505576053e-07, 5.539629331219622e-07, 5.823483269339555e-07, 6.121877558329872e-07, 6.435556673550784e-07, 6.76530318197389e-07, 7.11193968625535e-07, 7.476330874146019e-07, 7.859385664761036e-07, 8.262059473806378e-07, 8.685356583388603e-07, 9.130332643006461e-07, 9.598097292117287e-07, 1.00898169167521e-06, 1.060671754744066e-06, 1.115008790907506e-06, 1.172128261637576e-06, 1.23217255419433e-06, 1.2952913350905e-06, 1.361641921627413e-06, 1.431389672481924e-06, 1.504708397937826e-06, 1.581780791376516e-06, 1.662798882242544e-06, 1.747964512500223e-06, 1.83748983726308e-06, 1.931597850450059e-06, 2.030522937802002e-06, 2.134511457702443e-06, 2.243822351419735e-06, 2.358727784554673e-06, 2.479513821255705e-06, 2.606481131967082e-06, 2.739945738277767e-06, 2.880239794423958e-06, 3.027712409236225e-06, 3.182730509493007e-06, 3.345679747151164e-06, 3.516965452593871e-06, 3.697013636365398e-06, 3.886272041420325e-06, 4.085211248743509e-06, 4.294325839397985e-06, 4.514135614960187e-06, 4.745186880014382e-06, 4.98805378933637e-06, 5.243339763759642e-06, 5.511678977039172e-06, 5.793737918369515e-06, 6.090217033565457e-06, 6.401852449723051e-06, 6.72941778635497e-06, 7.073726058232116e-06, 7.435631674726088e-06, 7.816032539254034e-06, 8.215872255204775e-06, 8.636142442939529e-06, 9.07788517333264e-06, 9.5421955243182e-06, 1.003022426585313e-05, 1.054318068012206e-05, 1.10823355233605e-05, 1.164902413702554e-05, 1.224464971520561e-05, 1.287068673567861e-05, 1.352868456390612e-05, 1.422027123726346e-05, 1.494715743940145e-05, 1.571114067344491e-05, 1.651410964482773e-05, 1.735804886247502e-05, 1.824504347129035e-05, 1.917728432538399e-05, 2.015707331501575e-05, 2.118682895972582e-05, 2.226909228005186e-05, 2.340653296226116e-05, 2.460195583005974e-05, 2.585830763847762e-05, 2.717868420571743e-05, 2.856633789928508e-05, 3.002468549407006e-05, 3.155731642007375e-05, 3.316800141939733e-05, 3.486070163140482e-05, 3.663957812780529e-05, 3.850900191934646e-05, 4.047356445604761e-05, 4.253808864595038e-05, 4.470764041723165e-05, 4.698754084848115e-05, 4.938337889701431e-05, 5.190102475151149e-05, 5.454664383984061e-05, 5.732671152415027e-05, 6.024802851470365e-05, 6.331773703752391e-05, 6.654333779149106e-05, 6.993270773226278e-05, 7.349411872241127e-05, 7.723625708822903e-05, 8.116824412626326e-05, 8.529965760452926e-05, 8.964055430393374e-05, 9.420149365004516e-05, 9.899356248457163e-05, 0.0001040284010307796, 0.0001093182301075437, 0.0001148758796498306, 0.0001207148185961589, 0.0001268491862052102, 0.0001332938248677504, 0.0001400643144821468, 0.0001471770084634194, 0.0001546490714613936, 0.0001624985188642691, 0.0001707442581680466, 0.0001794061322952812, 0.0001885049649502974, 0.0001980626081016907, 0.0002081019916847198, 0.0002186471756235596, 0.0002297234042726028, 0.0002413571633840403, 0.0002535762397101124, 0.0002664097833541358, 0.0002798883729882554, 0.0002940440840590559, 0.000308910560109085, 0.0003245230873441669, 0.0003409186725824224, 0.0003581361247261166, 0.0003762161398999391, 0.0003952013904075082, 0.0004151366176591371, 0.0004360687292319727, 0.0004580469002263982, 0.0004811226790890679, 0.0005053500980764855, 0.0005307857885400425, 0.000557489101216037, 0.0005855222317113035, 0.0006149503513783433, 0.0006458417437796161, 0.0006782679469447973, 0.0007123039016299551, 0.00074802810579057, 0.0007855227754859818, 0.0008248740124363729, 0.000866171978455394, 0.0009095110769874057, 0.0009549901419784255, 0.001002712634314252, 0.001052786846059528, 0.00110532611273437, 0.001160449033863396, 0.001218279702034466, 0.001278947940701232, 0.001342589550963508, 0.001409346567553904, 0.001479367524258896, 0.001552807728993011, 0.001629829548741704, 0.001710602704577315, 0.001795304576945093, 0.001884120521403284, 0.001977244194987176, 0.002074877893353919, 0.002177232898842398, 0.002284529839567094, 0.002396999059636946, 0.002514881000566984, 0.002638426593919267, 0.002767897665177854, 0.002903567348825224, 0.003045720514546718, 0.003194654204445336, 0.003350678081098923, 0.003514114886237193, 0.003685300909756461, 0.003864586468724379, 0.004052336395954571, 0.004248930537653742, 0.004454764259558236, 0.004670248960884732, 0.004895812595319675, 0.005131900198162942, 0.005378974418625012, 0.005637516056148983, 0.005908024599494094, 0.006191018767168655, 0.006487037047644905, 0.006796638237618132, 0.007120401976391079, 0.00745892927427271, 0.007812843032672927, 0.008182788553353906, 0.00856943403406686, 0.008973471047550877, 0.009395615000609285, 0.009836605569696864, 0.01029720710915651, 0.01077820902793119, 0.01128042613024749, 0.01180469891542088, 0.01235189383156918, 0.01292290347763946, 0.01351864674775503, 0.01414006891147337, 0.01478814162311353, 0.01546386285286124, 0.01616825673189588, 0.0169023733033014, 0.01766728817002941, 0.01846410203067596, 0.01929394009331248, 0.02015795135708564, 0.02105730775076351, 0.02199320311686605, 0.02296685202947772, 0.02397948843329783, 0.02503236409095315, 0.02612674682507283, 0.02726391854111762, 0.02844517301647062, 0.02967181344083662, 0.03094514969257371, 0.03226649533520075, 0.03363716431799196, 0.03505846736430571, 0.03653170803109321, 0.038058178422922, 0.0396391545438297, 0.0412758912704142, 0.04296961692978155, 0.04472152746632693, 0.04653278018183278, 0.04840448703405476, 0.05033770747984003, 0.05233344084991762, 0.05439261824382131, 0.05651609393498967, 0.05870463627794891, 0.06095891811164706, 0.06327950665550493, 0.06566685289759179, 0.06812128047755939, 0.0706429740705993, 0.07323196728273935, 0.07588813007231539, 0.0786111557174338, 0.08140054735473197, 0.08425560412075124, 0.08717540693377146, 0.09015880396106454, 0.09320439582416012, 0.09631052060295087, 0.09947523870824918, 0.1026963177017396, 0.1059712171521822, 0.1092970736271041, 0.1126706859301303, 0.1160885007054335, 0.1195465985424837, 0.1230406807263261, 0.1265660567908334, 0.1301176330447462, 0.1336899022526659, 0.1372769346653272, 0.1408723706053734, 0.144469414826176, 0.1480608328718831, 0.1516389496765365, 0.1551956506485072, 0.1587223854934343, 0.1622101750338921, 0.1656496212869294, 0.1690309210609695, 0.172343883330975, 0.1755779506449033, 0.1787222248047871, 0.1817654970519635, 0.184696282967504, 0.187502862275391, 0.190173323707025, 0.1926956150507941, 0.1950575984693772, 0.1972471111198398, 0.1992520310571518, 0.2010603483403851, 0.2026602411923818, 0.2040401569882395, 0.2051888977656515, 0.206095709861338, 0.2067503771830287, 0.2071433175264021, 0.2072656812420452, 0.2071094514500212, 0.2066675448905125, 0.2059339123899682, 0.2049036378152434, 0.2035730342857096, 0.2019397363178104, 0.200002786490928, 0.197762715150801, 0.1952216116104201, 0.192383185271769, 0.1892528150784989, 0.1858375857231552, 0.1821463090762223, 0.1781895293811931, 0.1739795108726804, 0.1695302066253626, 0.1648572076315389, 0.1599776713343905, 0.1549102291117314, 0.1496748725084251, 0.1442928183504719, 0.1387863532337762, 0.1331786582572475, 0.1274936152525558, 0.1217555961382326, 0.1159892373784172, 0.1102192018385141, 0.1044699305813535, 0.09876538731676561, 0.09312879828212196, 0.0875823902688459, 0.08214712929853032, 0.07684246207282286, 0.07168606175876517, 0.06669357891671572, 0.06187839743100151, 0.05725139417457621, 0.05282069985245284, 0.04859145706558417, 0.04456557017741982, 0.04074144013208629, 0.03711367607260661, 0.03367277457213005, 0.0304085476098426, 0.02732242627431508, 0.0244181745790783, 0.02169837466719757, 0.01916439833007885, 0.01681639063572121, 0.01465326658927257, 0.01267272161816663, 0.01087125651539442, 0.009244217287585371, 0.007785850140017314, 0.006489371589845611, 0.005347053433807869, 0.004350322010319013, 0.003489870892152774, 0.002755785829319235, 0.002137680437954786, 0.001624840806206285, 0.001206376869266612, 0.0008713780993411108, 0.0006090702682608057, 0.0004089698273941345, 0.0002610359645368598, 0.0001558166222948854, 8.457869176410615e-05, 3.942511381291314e-05, 1.339275712937404e-05, 5.282021629943179e-07, -4.062026976982269e-06, -4.189934642025162e-06, -2.596102234304898e-06, -9.938214633256615e-07, -1.493753881628979e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 2, + "j": 3 + } + ], + "beta_projectors": [ + { + "cutoff_radius": 1.2, + "radial_function": [ + 0.0003968893376868482, 0.0004018816038282441, 0.0004069366654379861, 0.000412055312404673, 0.0004163653553318908, 0.0004258568385323153, 0.0004257068869632855, 0.0004346201935363642, 0.0004365713328347565, 0.0004467352247078393, 0.0004496475356767088, 0.000456137316798548, 0.0004608331251490794, 0.000465329624152383, 0.0004729157340538151, 0.0004833013065147981, 0.0004895304165771983, 0.0004937926406420601, 0.0004958248406000537, 0.0004961156704937492, 0.0005134224362556769, 0.0005134272009389082, 0.0005236861247386107, 0.0005286393913453494, 0.0005372708767379872, 0.0005391866414558508, 0.000549675415813064, 0.0005573688260889637, 0.0005605398144772553, 0.0005712412045265, 0.0005778123691477233, 0.0005870094103420653, 0.0005925082292590617, 0.0005991488818651303, 0.0006060092217889113, 0.0006118238982912999, 0.0006258423773864707, 0.0006288086156031741, 0.0006376158388491093, 0.0006463978790232083, 0.0006560446719972995, 0.00066011186380803, 0.0006739100205066101, 0.0006754155397119682, 0.0006912652438082756, 0.0006993014181730234, 0.0007019086347779951, 0.000713085161644908, 0.0007169294217700822, 0.0007371399526806127, 0.0007415725784944427, 0.0007531452529656729, 0.0007581456017612398, 0.0007705470965905931, 0.000780944172580969, 0.0007885067201134632, 0.000798527282932629, 0.0008089467630630759, 0.0008193281964390536, 0.00082752863530626, 0.0008405036792783205, 0.0008532378059638163, 0.0008567475627593718, 0.0008708116663311005, 0.000881628271453383, 0.0008948678616246403, 0.0009045446848237465, 0.0009158891390189244, 0.0009290889297154242, 0.0009404038184935848, 0.0009518503636679887, 0.0009666798865335113, 0.0009755760568430361, 0.000988255393952503, 0.0009994757843045377, 0.001014490166016555, 0.001024760628935778, 0.00103938817139819, 0.001052923011246418, 0.001064412673752083, 0.001078563251434461, 0.001093044668201079, 0.001105789036507376, 0.001120678760498789, 0.001131405629084634, 0.001148727530218496, 0.001161377070939495, 0.001175329749549814, 0.001190642374186507, 0.001210176425505832, 0.001222398838230748, 0.001236497012648829, 0.001255188027012882, 0.001269879630305726, 0.001286224326264823, 0.001300712708980384, 0.001317871627928957, 0.001334948783007162, 0.001353856067212861, 0.001367471058081715, 0.001386459314568073, 0.001403245683324394, 0.001418883113090214, 0.0014386859666565, 0.001458059515844767, 0.001474980454197, 0.00149438639169025, 0.001511950407888624, 0.001531008356627561, 0.001550953559544599, 0.001567950352576508, 0.00158951236101279, 0.001610341803219811, 0.001629257191254757, 0.001649349400727501, 0.001671198335940788, 0.001692640377428972, 0.001713077939329261, 0.001734178719078744, 0.001755520308910435, 0.001779739584172019, 0.001801132035736785, 0.001824421321777431, 0.001847596080275432, 0.001869307650844726, 0.001891652805600358, 0.001917240361724387, 0.001942018916529509, 0.001964810389432636, 0.001990396476417661, 0.002016297980585002, 0.002040711374083071, 0.00206662302631494, 0.002091763092139848, 0.002118487092775179, 0.002144048611623983, 0.002172246831151831, 0.002199973263366017, 0.002227214003894223, 0.002255271079522281, 0.002284917295440753, 0.002313056054350782, 0.002341290818698105, 0.002370514048984917, 0.002400192439487026, 0.002431445695456003, 0.002461042063507002, 0.002492969278338142, 0.00252407044190095, 0.002556805663562367, 0.00258826961274361, 0.002620893267386515, 0.002653683723747857, 0.002687316796666839, 0.002720559201377648, 0.002754693118137919, 0.002789053794227836, 0.002825397132257046, 0.002859393699020829, 0.002895443685096533, 0.002934146840781915, 0.002969153795885249, 0.003007448523856648, 0.003044578103733708, 0.003082792564882475, 0.003121338377694072, 0.003161638710430181, 0.003201313476817014, 0.003240615232175392, 0.003281804280556411, 0.003322718906042188, 0.00336547557978023, 0.003406498107174138, 0.003450457116355832, 0.003493661440634732, 0.003537812532676154, 0.003582257256063481, 0.003627688444208632, 0.003672516195862008, 0.003719684697600589, 0.003765101704282514, 0.003812927258173726, 0.003861439568680625, 0.003909448402566018, 0.003959404473274253, 0.004007989792379985, 0.004058735292505, 0.004110181633220411, 0.004161807767174452, 0.00421374913414041, 0.00426659461933071, 0.004320869516886483, 0.004375181769032843, 0.004430343231097367, 0.004485762918789327, 0.004541912093202427, 0.004598955975745778, 0.004656994547824533, 0.004716029462586535, 0.004774977153460203, 0.004835318024674184, 0.00489609126975088, 0.004957591596093658, 0.005020644495392967, 0.005082960222468125, 0.00514723810915918, 0.005211868255327601, 0.005277042964598078, 0.00534416030236029, 0.00541132681022205, 0.005479286747172574, 0.005547544698313867, 0.005617664267239975, 0.005688421100161656, 0.005759931617329239, 0.005832747817713146, 0.005905405017827769, 0.005980127832413751, 0.006055136078421897, 0.006131140229837184, 0.006208379761814098, 0.006286815891460074, 0.006365788407042716, 0.00644615711878993, 0.006526850411632479, 0.006609318780689599, 0.006692055149452925, 0.00677618341990871, 0.006861690591198233, 0.006947804141893554, 0.007035622323353197, 0.007123455877010909, 0.007213534817930189, 0.007304101718345946, 0.007396332200926392, 0.00748896401169119, 0.00758358310371543, 0.007678942762044505, 0.007775700775533134, 0.007873172445979404, 0.007972345138932746, 0.008072632245671452, 0.008174110451274696, 0.008276820439378607, 0.0083812821751853, 0.008486135068948464, 0.008593393150260198, 0.008701341674032248, 0.008810852729937313, 0.008921406360728024, 0.009034161175036304, 0.009147300402369107, 0.009262781194137593, 0.009378954501673364, 0.009496841349962981, 0.009616534517922659, 0.009737422016949715, 0.00985994247762223, 0.009983958570933371, 0.01010982420959383, 0.01023689737496092, 0.01036577549650541, 0.0104961364525511, 0.01062796622054107, 0.01076191009495919, 0.01089718672467521, 0.01103428486785285, 0.01117316603801655, 0.01131376098466741, 0.01145595918632991, 0.0115999972879901, 0.01174609070781751, 0.01189400313172784, 0.0120433138982714, 0.01219512143300361, 0.01234850749491945, 0.01250392552513468, 0.01266144053210239, 0.01282057071437781, 0.01298197238385423, 0.01314525645228182, 0.01331043861538816, 0.01347818065120165, 0.0136476541245365, 0.01381950949355892, 0.01399331366768242, 0.01416927377787958, 0.01434738222186449, 0.01452826171858779, 0.01471094895961267, 0.01489613835253611, 0.01508346368937485, 0.01527340800382282, 0.01546547370246776, 0.01566022160534319, 0.01585719432401067, 0.01605684983072608, 0.01625874273127925, 0.01646364588392668, 0.01667064508062199, 0.01688041440743371, 0.01709288258315467, 0.01730805394555923, 0.01752590322628444, 0.0177463427383131, 0.01796986098804521, 0.01819595885858383, 0.01842505941975903, 0.01865692801760188, 0.01889170969503295, 0.01912943522295896, 0.01937034518107739, 0.0196140063654382, 0.01986100770646493, 0.02011097224140945, 0.02036410984438053, 0.02062036146554957, 0.02087998406635405, 0.02114290256336249, 0.02140915474873495, 0.02167853319146706, 0.02195142594463872, 0.02222784995932248, 0.02250765323936088, 0.02279104960311829, 0.02307798470756506, 0.02336856266664977, 0.02366279639953683, 0.02396073008961253, 0.02426242922351562, 0.02456793333408092, 0.02487730814756794, 0.0251905505152803, 0.02550792886406406, 0.02582910187997411, 0.02615437063156945, 0.02648376759580229, 0.02681729619010431, 0.0271549451798121, 0.02749718611618723, 0.02784339007299019, 0.02819420641180548, 0.0285493051600918, 0.02890897717609908, 0.02927310731955801, 0.02964200610344525, 0.03001540533018687, 0.03039354308952147, 0.03077640393238923, 0.03116429534617549, 0.03155700969483588, 0.03195460491486601, 0.03235735092180025, 0.03276518967577191, 0.03317811786208935, 0.03359623437135184, 0.03401969218146572, 0.03444847111620497, 0.03488285710863925, 0.03532249116953068, 0.03576785712248176, 0.03621872706733287, 0.03667546067259009, 0.03713794057889785, 0.03760626578851767, 0.03808038839571637, 0.03856076602404368, 0.03904706278846426, 0.03953953992548076, 0.04003841608136582, 0.04054344748077376, 0.04105500306603549, 0.04157300785311069, 0.04209764115143244, 0.04262879948322951, 0.04316680879683599, 0.04371161171637716, 0.044263329024397, 0.04482207088297585, 0.04538797213334433, 0.04596104957183655, 0.04654139179570915, 0.04712909239369623, 0.04772432943136007, 0.04832708411271902, 0.04893761137857844, 0.04955585853620814, 0.0501819062634592, 0.05081606944971817, 0.05145824494746432, 0.05210871556954277, 0.05276737780726748, 0.05343448297086262, 0.05411010771860347, 0.05479429309162853, 0.05548732415445823, 0.05618918675436824, 0.05689998152834352, 0.05761997426691856, 0.05834908333418465, 0.05908761450846534, 0.05983548948568718, 0.06059311624468755, 0.06136033070637747, 0.06213745518392244, 0.06292452743819865, 0.06372172181261782, 0.06452912882805606, 0.06534694963569176, 0.06617531222886619, 0.06701430676999998, 0.06786409798495541, 0.06872486563749813, 0.06959674555504325, 0.07047988312902564, 0.07137440551931373, 0.07228041722766942, 0.07319823206477771, 0.07412794579988091, 0.0750695863860571, 0.0760235101711082, 0.07698976479480812, 0.0779685841033988, 0.07896009259867931, 0.07996446839776183, 0.08098194731532438, 0.0820126732328319, 0.08305678009100248, 0.0841145517460955, 0.08518610195408449, 0.08627166295096345, 0.0873713939215559, 0.08848552522905173, 0.08961426342073864, 0.09075772247139646, 0.09191631136232381, 0.09309008352379422, 0.09427923541929317, 0.09548411445432059, 0.09670482940943437, 0.09794170981203365, 0.09919488167643209, 0.1004646382893455, 0.1017512095819377, 0.1030548582454571, 0.1043758210024386, 0.105714333702446, 0.1070706776533239, 0.1084450719031427, 0.1098378670441953, 0.1112492197836552, 0.1126795233010007, 0.1141289441921103, 0.1155978785936682, 0.1170865604833864, 0.1185952963932504, 0.1201243911567005, 0.1216741449525678, 0.1232448779310502, 0.1248369673147245, 0.1264506172307733, 0.1280862659256226, 0.1297442048748376, 0.131424826553463, 0.1331284361163743, 0.1348554188153557, 0.1366061378453693, 0.1383809985274276, 0.1401803093130827, 0.1420045702041044, 0.1438540547890224, 0.1457292943448341, 0.147630622833594, 0.149558512000814, 0.1515133819247225, 0.153495656601045, 0.1555058286744924, 0.1575443405591948, 0.1596116571292967, 0.1617082934472016, 0.163834729070679, 0.1659914697961469, 0.1681790457586807, 0.1703979809922274, 0.1726487898423912, 0.174932122123721, 0.1772483998216856, 0.1795983429584744, 0.1819824713909881, 0.1844014203260365, 0.1868558135620653, 0.1893462987639722, 0.1918735180913345, 0.1944381746669848, 0.1970409124374122, 0.1996824443727192, 0.202363540553236, 0.2050849029172426, 0.2078473057967933, 0.2106515311782644, 0.2134983792791729, 0.216388685793941, 0.2193232772460867, 0.2223030096318865, 0.2253288209892901, 0.2284015864966378, 0.231522248300859, 0.2346917982436025, 0.2379111893343087, 0.2411814524300198, 0.244503673505406, 0.2478788649484969, 0.2513081751414603, 0.2547927486094963, 0.2583337236191099, 0.2619323300529928, 0.2655897832580187, 0.2693073952462548, 0.2730864549942602, 0.276928305644887, 0.2808343507358444, 0.2848060274666722, 0.2888448358655324, 0.2929522343335371, 0.2971298441079817, 0.3013792754099214, 0.3057021612106325, 0.3101002581172771, 0.3145753018442683, 0.3191291607064031, 0.3237636589557553, 0.3284808069491163, 0.3332825557389256, 0.3381710074096986, 0.3431482824157825, 0.3482165920860609, 0.3533782121832587, 0.3586354930870783, 0.3639908636523543, 0.3694468286391484, 0.37500598891382, 0.3806710210987418, 0.3864447016537724, 0.3923298676611324, 0.3983294989280566, 0.4044466382075598, 0.4106844732628474, 0.4170462420874799, 0.4235353648305207, 0.430155301963957, 0.4369097100274857, 0.4438023188853251, 0.4508370064731508, 0.4580177876764822, 0.4653488379612227, 0.4728344241273064, 0.4804790297352426, 0.4882872407657217, 0.4962638675617441, 0.5044138244413219, 0.5127422472792418, 0.5212544462628514, 0.529955920099944, 0.5388523618936318, 0.5479496648252322, 0.5572539750992291, 0.5667715977457775, 0.5765091274126846, 0.5864733434680076, 0.596671327448739, 0.6071103835303333, 0.6177980817537971, 0.6287422885069205, 0.6399511550905075, 0.6514331182345217, 0.6631969173393628, 0.6752516421476784, 0.6876066848494418, 0.7002718093155167, 0.7132570884920142, 0.726573038887679, 0.7402304630081176, 0.7542406672131889, 0.7686152677309934, 0.7833663588779487, 0.7985064683274103, 0.8140485601259471, 0.8300060680636534, 0.8463929141490922, 0.863223514263666, 0.8805127993018402, 0.8982762213205575, 0.916529772799322, 0.9352900252519447, 0.9545740999927546, 0.9743997302577441, 0.9947852542405116, 1.015749618424891, 1.037312417591082, 1.059493887292376, 1.082314963823677, 1.105797218058555, 1.129962958326132, 1.154835173981553, 1.180437576259465, 1.206794633412727, 1.233931512048116, 1.26187415089197, 1.290649247264668, 1.320284239683858, 1.350807340344517, 1.382247506673551, 1.414634476187132, 1.447998710910525, 1.482371440467261, 1.51778459772126, 1.554270845730301, 1.591863536455079, 1.630596667059976, 1.67050488059904, 1.711623407773502, 1.753988014658944, 1.797634979049448, 1.842600988056518, 1.88892310459515, 1.936638669684285, 1.985785204533784, 2.036400345560367, 2.088521685636437, 2.142186670221022, 2.19743248008723, 2.254295808117391, 2.312812766115712, 2.373018644976582, 2.434947718471008, 2.498633022992536, 2.564106083991291, 2.631396674914669, 2.700532499304401, 2.771538864351044, 2.844438354977186, 2.919250405762853, 2.995990952797356, 3.074671948201456, 3.155300893529329, 3.237880337018609, 3.322407326924832, 3.40887280998551, 3.497261034958569, 3.587548851921724, 3.679705036830711, 3.773689522978628, 3.869452610960366, 3.966934138572455, 4.066062579731309, 4.166754137233298, 4.268911754463218, 4.372424111812222, 4.477164560911206, 4.582990031601653, 4.689739889947916, 4.797234764002425, 4.905275356118977, 5.013641186166353, 5.122089368344803, 5.230353318196265, 5.338141507040035, 5.445136160201201, 5.550992020392314, 5.655335110851254, 5.757761521216707, 5.857836272730836, 5.955092243356461, 6.049029161750383, 6.139112743653328, 6.224773911188777, 6.305408216475982, 6.380375407331158, 6.448999219857981, 6.510567407995993, 6.564332050293913, 6.609510161726831, 6.645284653698951, 6.670805682241507, 6.685192428572336, 6.687535346576354, 6.676898930769653, 6.652325047517408, 6.612836882682842, 6.557443532717045, 6.48514532227793, 6.394939853652758, 6.285828869045403, 6.156825942457658, 6.00696505671803, 5.835310085453912, 5.640965226452934, 5.423086397184917, 5.180893603839365, 4.913684299147661, 4.620847716127145, 4.301880138218465, 3.95640110389688, 3.584170453012803, 3.185106165132353, 2.759302869921783, 2.307050910852972, 1.828855816173871, 1.325457961982458, 0.7978522398433228, 0.2473074294148461, -0.3246150119308382, -0.9160430878643879, -1.524777766992315, -2.148278267091008, -2.783649675298605, -3.427633464481197, -4.076601545259852, -4.726554583895958, -5.373125347098632, -6.011587923150138, -6.636873754971784, -7.243595427390582, -7.826079267964472, -8.378407817025892, -8.89447325450661, -9.368042883155095, -9.792837692120406, -10.16262496178114, -10.47132570149606, -10.71313745329149, -10.88267268159556, -10.97511243193344, -10.98637433378377, -10.91329309270587, -10.75381052302971, -10.50717067970751, -10.17411379074808, -9.757060350908613, -9.260273797711086, -8.689986552612755, -8.054469729675933, -7.364021260110485, -6.630840457528299, -5.868748736622365, -5.092706125442669, -4.318060915593616, -3.559454765254705, -2.829287343068035, -2.135622338464148, -1.479389688866608, -0.8560816813766465, -0.2738095004802765, 0.2543739494261397, 0.7144910176289181, 1.091668842495992, 1.370182588359699, 1.533511967495648, 1.564430424255413, 1.444954047688553, 1.157845971553072, 0.6732169578320465, 0.08014078499353657, -0.008415727846234672, 0.0003493394600571776, -0.000532822153231509, -0.0004569584170021967, -0.000474581196647202, -0.0004815023484200967, -0.0004881148550268148, -0.0004934503984943032, -0.0004976656055584056, -0.0005008057535871855 + ], + "ultrasoft_cutoff_radius": 1.4, + "angular_momentum": 0, + "label": "2S" + }, + { + "cutoff_radius": 1.2, + "radial_function": [ + -0.001594556311448315, -0.00161461337712436, -0.001634922730255041, -0.001655487544249948, -0.001675235746875801, -0.001701728342914349, -0.001716020859916699, -0.001742183846875037, -0.00175950886436959, -0.00178780377631757, -0.001806742471556586, -0.001830661535279707, -0.001852149528031422, -0.001873784604379397, -0.001899582816431461, -0.001929448803049016, -0.001953912982645301, -0.00197591973451948, -0.001995303613910801, -0.00201250039198049, -0.002052465359137723, -0.002069740793849584, -0.002100750551629491, -0.002125147003376973, -0.002154397466973268, -0.002175073709871467, -0.00220744190026767, -0.0022361862757585, -0.002259269011773896, -0.002292442253305105, -0.002320538623217322, -0.00235228251219749, -0.002379282292828744, -0.002408226098884789, -0.002437580094915057, -0.002465844092169295, -0.00250525426136497, -0.002530321007743766, -0.002563268626747231, -0.002596562499724675, -0.002631145102778842, -0.002658738009642919, -0.002699423184111274, -0.002724234849581286, -0.002768211736537574, -0.0028021524445834, -0.002829175879653228, -0.002867874822093192, -0.002897210961571861, -0.002948459575177337, -0.002979112719033047, -0.003019615085644134, -0.00305157520333553, -0.003093872642757373, -0.003133670546674197, -0.003170046454383471, -0.003210120719746995, -0.00325091995969394, -0.003292171291874368, -0.003330773535901485, -0.003376052051593942, -0.003421364544645547, -0.003454830279152568, -0.003502666603294721, -0.003546553140705011, -0.003593969317833706, -0.00363715322650085, -0.003682800104494011, -0.003731336868148083, -0.003777845321371126, -0.003824801678477124, -0.003876736050956925, -0.003921152306575471, -0.003971046632905206, -0.004019395221073974, -0.004073144271784818, -0.004121096364737509, -0.00417526119036804, -0.004228386163553088, -0.004279217319413081, -0.004334082306258313, -0.004389777895189795, -0.00444366884509434, -0.004500894666408128, -0.004552999218355142, -0.004614357030110622, -0.004670059941221549, -0.004727886424455596, -0.004788098077010222, -0.004854365251231035, -0.004911392160197592, -0.004971543447309851, -0.005038161168579747, -0.005100161926151869, -0.005164770353817738, -0.005227517342238863, -0.005294287152553923, -0.005361553361705768, -0.005431805271122969, -0.005495580405576822, -0.005567114112720012, -0.005636245104216675, -0.005704486485669069, -0.005778865436508129, -0.005853195113213638, -0.005924965819980033, -0.006000619819260339, -0.006074436850134229, -0.006150940082831208, -0.006229231139538466, -0.006304237068443491, -0.00638597537104027, -0.006467376044360762, -0.006546988331795484, -0.00662876688849065, -0.006713636511450433, -0.006798651767825764, -0.006883014606624466, -0.00696903992649639, -0.007056028329453179, -0.007147642268927001, -0.007236216452869648, -0.007328076720484966, -0.00742058458188849, -0.007511847794988313, -0.007604815143409605, -0.007702842155538403, -0.007800617893789941, -0.007896569671615505, -0.00799705316661864, -0.008098796831264687, -0.008199405416218164, -0.008302850131386708, -0.00840617142410588, -0.008512457928811334, -0.00861808202736497, -0.008728118389348838, -0.00883841633806376, -0.008949028475301601, -0.009061645963649556, -0.009177330094604585, -0.009291967712382887, -0.009407695785772496, -0.009525752663330445, -0.009645380275531024, -0.009768122454225037, -0.009889664968875389, -0.01001533554800488, -0.01014099809653002, -0.01026984663201291, -0.01039810264793867, -0.01052900097341469, -0.01066118409009276, -0.01079563507592557, -0.01093067395633046, -0.01106806494300894, -0.01120689615452947, -0.01134951438731258, -0.01149025899856565, -0.01163488894827014, -0.01178426381414988, -0.01192996423365483, -0.01208127494272987, -0.01223232340820279, -0.01238607161354605, -0.01254157431194902, -0.01270070669533097, -0.01286031541979132, -0.01302083515893471, -0.01318514402063148, -0.01335054123646867, -0.0135197249246039, -0.01368804863799122, -0.01386170849664209, -0.01403577206827644, -0.0142126303428759, -0.01439132167527502, -0.01457282075009392, -0.01475507741622106, -0.0149419359894446, -0.0151281076261135, -0.01531898518212094, -0.01551240816415544, -0.01570679862192481, -0.01590538206727221, -0.01610385392501246, -0.01630684298919371, -0.0165124713239313, -0.01672009351925419, -0.0169298553953352, -0.01714260686065439, -0.01735902435551229, -0.01757733180961818, -0.01779860437328606, -0.01802204915089056, -0.01824840254822702, -0.01847781612190458, -0.0187104838636063, -0.0189464521040732, -0.01918425232835308, -0.01942595174821193, -0.0196702298592194, -0.01991752940787472, -0.02016899981991493, -0.02042156515024307, -0.02067890512483084, -0.02093885532062456, -0.02120172476752613, -0.021469408456401, -0.02173936187467007, -0.02201268385317523, -0.02228868957617136, -0.02256949549269771, -0.02285351867542917, -0.0231409161380951, -0.02343248263424746, -0.02372628355800011, -0.02402530477663489, -0.02432723474161069, -0.02463300630209142, -0.02494301983903369, -0.025257217881963, -0.02557476288333926, -0.02589686603221097, -0.02622206942472971, -0.02655240448874818, -0.02688586255563197, -0.02722397480904615, -0.02756678434258197, -0.02791326340866795, -0.02826493546914621, -0.02861958376185503, -0.0289802027149173, -0.02934451640457958, -0.02971407921575442, -0.03008731091834704, -0.030466315392226, -0.03084949303466722, -0.03123776035717318, -0.03163022522871373, -0.03202827854413171, -0.03243115152643106, -0.03283899651988453, -0.03325192181560022, -0.03367063825687389, -0.03409341591908843, -0.03452293367153669, -0.03495699028783794, -0.03539677645395378, -0.0358416469981433, -0.03629318868743936, -0.03674904184453132, -0.03721184073265094, -0.03767946346452122, -0.0381532985297719, -0.03863351925523606, -0.03911936751475809, -0.03961147945997001, -0.04010972105916522, -0.04061460854795262, -0.04112535503203585, -0.04164279277196403, -0.04216656171778266, -0.04269668961832096, -0.04323409145756596, -0.04377778474374239, -0.0443284777720363, -0.04488617365788716, -0.04545083633974678, -0.0460223883556834, -0.04660119441115951, -0.04718760177019132, -0.04778136564970836, -0.04838198303995389, -0.04899097416428932, -0.04960718862737812, -0.05023129089293366, -0.05086342370653218, -0.05150302916567595, -0.05215103615198613, -0.0528070001718759, -0.05347100326673537, -0.05414400225154781, -0.05482496224476474, -0.05551482107299166, -0.05621307442725176, -0.05692006495624202, -0.05763586871571613, -0.05836137658637996, -0.05909538989584862, -0.05983890791628044, -0.06059152154170766, -0.06135394835796903, -0.06212561164349095, -0.06290733185378672, -0.0636985902148357, -0.0645000682321566, -0.06531126696098033, -0.06613329365530696, -0.06696501929314724, -0.06780742821296437, -0.06866051215195723, -0.06952436589494315, -0.07039905105769684, -0.0712845354922993, -0.0721815658383913, -0.07308956745767903, -0.07400920054676074, -0.07494025483493294, -0.07588300916932234, -0.07683761485347906, -0.07780447978269134, -0.07878313866307929, -0.07977447289033934, -0.08077808007689624, -0.08179435544455682, -0.08282331464481008, -0.0838654148571778, -0.08492066065222037, -0.0859892126790851, -0.08707091593209038, -0.0881663828746766, -0.08927576342820309, -0.0903989682244779, -0.09153639494063981, -0.09268809692253344, -0.09385432682409922, -0.09503522668299569, -0.09623097543742842, -0.09744179080303628, -0.09866784549554986, -0.09990935936417555, -0.1011664546033, -0.1024396224258171, -0.1037285407757067, -0.1050337441728475, -0.1063554143438874, -0.1076936885887596, -0.1090486959773997, -0.1104212030541301, -0.1118105200361015, -0.113217651298164, -0.1146423060097446, -0.1160850207269251, -0.1175457944076852, -0.119025188092885, -0.1205230032350899, -0.1220397102265078, -0.1235754510416793, -0.1251307909192212, -0.1267056136342779, -0.1283001674177662, -0.1299149727487254, -0.1315501218149207, -0.1332057828720821, -0.1348822574225617, -0.136579925832506, -0.1382989368261462, -0.1400398493081925, -0.1418023723112967, -0.1435873267959504, -0.1453945980287052, -0.1472248538098029, -0.1490781285555087, -0.1509547487935041, -0.1528548428694272, -0.1547792189351851, -0.1567276325614682, -0.1587006316886619, -0.160698715533532, -0.1627217650709705, -0.1647704844044453, -0.1668449852908065, -0.1689457244617349, -0.1710727828764034, -0.1732268141337743, -0.1754079653714209, -0.1776166275676445, -0.1798531755911101, -0.1821180227369528, -0.1844114248085864, -0.1867337409628336, -0.1890853373952535, -0.1914666922193682, -0.1938780340908135, -0.1963199485662548, -0.1987926203647357, -0.2012964162246247, -0.2038320127712101, -0.2063995377631667, -0.2089996364508746, -0.2116324377326455, -0.2142985527987502, -0.2169983593117886, -0.2197321931436189, -0.2225007179686731, -0.2253042011776376, -0.2281430697566194, -0.2310179695577848, -0.2339290940865393, -0.2368771511334593, -0.2398623435143925, -0.2428855208785858, -0.2459467816630987, -0.2490468716892877, -0.2521861638768481, -0.2553652129821451, -0.25858447218876, -0.2618445421367532, -0.2651459303705934, -0.2684891000906262, -0.271874616031555, -0.2753030693308602, -0.2787749957516112, -0.2822909466116155, -0.2858514586857019, -0.2894570369037144, -0.2931084723534116, -0.2968062706219214, -0.3005508522071896, -0.3043430775582632, -0.3081834034852413, -0.3120725359703584, -0.3160110445075848, -0.3199995722793609, -0.3240388455692571, -0.3281294721529946, -0.3322720526519003, -0.3364673924931498, -0.3407160759197883, -0.3450188504131244, -0.3493763692890237, -0.353789391432489, -0.3582586436688793, -0.3627847389156749, -0.3673686869837756, -0.3720110308251741, -0.3767125095166399, -0.3814740678067619, -0.3862963410286469, -0.3911802617391338, -0.396126500167151, -0.4011359538157642, -0.4062094426721004, -0.4113478354050102, -0.4165519749378639, -0.4218227077209171, -0.4271609471183595, -0.4325675318761384, -0.4380434800986386, -0.4435895618820602, -0.4492068642090246, -0.4548961852915685, -0.4606586295625199, -0.466495098212107, -0.4724065933188086, -0.4783941213672342, -0.4844586924182295, -0.4906013483267841, -0.4968232146002265, -0.5031251967207133, -0.5095085145734473, -0.5159742053901795, -0.5225234444329026, -0.5291573021608933, -0.5358769611924785, -0.5426835843145312, -0.5495783967792072, -0.5565625059915433, -0.5636372797782663, -0.5708037952844442, -0.5780634788604395, -0.5854175054206543, -0.5928672398706674, -0.6004139877375465, -0.6080590700190882, -0.6158039053661604, -0.6236498572243541, -0.6315983263099376, -0.6396507902629054, -0.6478086890270838, -0.656073505209676, -0.6644467654737554, -0.6729299968252107, -0.6815247228992902, -0.690232658016857, -0.6990552231271643, -0.7079942444838387, -0.7170512938412372, -0.7262281054446404, -0.7355264089497169, -0.7449479755164141, -0.754494580413013, -0.7641680895705889, -0.77397030092256, -0.7839031155489202, -0.7939685197783604, -0.8041684213884857, -0.814504835106387, -0.8249797958853193, -0.8355953756507049, -0.846353702024107, -0.8572569046418437, -0.8683071611937443, -0.8795067705660635, -0.8908579599320987, -0.9023630557585243, -0.9140244632538916, -0.9258445464702163, -0.9378257854229588, -0.949970744342771, -0.9622819017560431, -0.9747619278540256, -0.9874135003025799, -1.000239299742944, -1.013242140751416, -1.026424829117147, -1.039790312421424, -1.053341517941618, -1.06708145823719, -1.081013236565141, -1.095140012825205, -1.109465042249596, -1.123991536899425, -1.138722935229147, -1.153662674401545, -1.16881423174847, -1.18418126069303, -1.199767398334721, -1.215576468298877, -1.231612247659392, -1.247878770217312, -1.26438000275747, -1.281120122255039, -1.298103340632153, -1.315334000889983, -1.332816540457948, -1.350555506116407, -1.368555557852915, -1.386821464996963, -1.405358133325841, -1.4241705771963, -1.443263951339986, -1.462643498554257, -1.482314657311285, -1.502282956696845, -1.522554123564667, -1.543133956832698, -1.56402850209215, -1.585243861922621, -1.606786402845756, -1.628662590493477, -1.650879089480938, -1.673442746506716, -1.696360620915534, -1.719639893022674, -1.743288027489321, -1.767312622419546, -1.791721568661934, -1.816522889458248, -1.841724895151124, -1.867336117978681, -1.893365330911557, -1.919821553197323, -1.946714055387687, -1.974052429236067, -2.001846459680053, -2.030106296384147, -2.058842313298058, -2.088065260933959, -2.117786159351752, -2.148016352280933, -2.178767545442952, -2.21005178779383, -2.241881469871942, -2.274269342893461, -2.307228578644597, -2.340772702761711, -2.374915682122449, -2.409671838326446, -2.445056021424278, -2.481083396959477, -2.517769729394288, -2.555131120695876, -2.593184228044379, -2.631946197777765, -2.671434665631892, -2.711667793174385, -2.752664286465887, -2.794443396921935, -2.837024941967528, -2.880429305517836, -2.924677456042119, -2.969790989035971, -3.015792079852631, -3.062703556699191, -3.110548881787992, -3.159352145814054, -3.209138110428157, -3.25993218693992, -3.311760504470939, -3.364649809694541, -3.418627590952354, -3.473721989706249, -3.529961841120468, -3.587376705735153, -3.645996776464187, -3.705852960757966, -3.76697684615992, -3.829400659274057, -3.89315729167204, -3.958280242641607, -4.024803646480308, -4.092762178061747, -4.162191089065061, -4.233126100919364, -4.305603417105431, -4.3796596459988, -4.455331717388333, -4.53265685911451, -4.611672493238444, -4.692416140058583, -4.774925359191798, -4.85923758521092, -4.945390053839049, -5.033419642417755, -5.123362709615179, -5.215254977129888, -5.309131287865171, -5.405025434667749, -5.502969972136356, -5.602995886447388, -5.705132438252036, -5.809406809184793, -5.915843807961153, -6.024465543213637, -6.135291025613355, -6.248335814397791, -6.363611561130739, -6.48112554322114, -6.60088019911538, -6.722872528155818, -6.847093598798352, -6.973527874681309, -7.102152568776461, -7.232936951060566, -7.365841600055603, -7.500817584801695, -7.637805656466611, -7.776735305127403, -7.917523854265738, -8.060075435796904, -8.204279942072077, -8.350011929729277, -8.49712943565271, -8.645472791542574, -8.794863337642028, -8.945102124969907, -9.095968542724137, -9.247218919323558, -9.398585068738242, -9.549772804018346, -9.700460445428973, -9.850297248546894, -9.99890189036637, -10.14586087678421, -10.29072702751321, -10.4330179022067, -10.57221432886351, -10.70775894956168, -10.83905481365889, -10.96546409476389, -11.08630690646297, -11.2008602287307, -11.30835704343184, -11.40798560034636, -11.49888897481389, -11.58016483453821, -11.65086552856653, -11.70999851056803, -11.7565271503646, -11.78937196965614, -11.80741235799894, -11.80948882139197, -11.79440582060928, -11.76093524288231, -11.70782057688763, -11.63378184405138, -11.53752135525946, -11.41773032451582, -11.27309644656179, -11.10231244172468, -10.90408567003288, -10.67714883291359, -10.42027182977671, -10.1322747902158, -9.81204233701959, -9.458539087096398, -9.070826398259598, -8.648080374163513, -8.189611102039365, -7.694883062571535, -7.163536699611214, -6.595411018707878, -5.990567137666844, -5.349312621536981, -4.672226434540761, -3.960184305288905, -3.214384211238396, -2.436371723811305, -1.628064808314306, -0.7917776894077946, 0.0697566849466682, 0.9533661645954155, 1.855421460565516, 2.771821490710678, 3.697982751682346, 4.628833566237454, 5.558814169733608, 6.481883640325577, 7.391534789458014, 8.28081823935658, 9.142376919863583, 9.96849236472976, 10.75114417802443, 11.48208407599229, 12.15292591665514, 12.75525302261586, 13.28074400563032, 13.72131806908005, 14.06930040801284, 14.3176078982678, 14.45995454293623, 14.49107532144829, 14.40696584350439, 14.20513374432637, 13.88485576852885, 13.44743201985656, 12.89642574377609, 12.23787311573675, 11.48044269072907, 10.63551824700413, 9.717171424237396, 8.74198168427515, 7.728650166957529, 6.697340741884883, 5.668665365696994, 4.662211070847692, 3.694481923720367, 2.776100000205346, 1.908073941613294, 1.084125674987159, 0.3149715830189579, -0.3820516101097153, -0.9883966327970812, -1.484345941068765, -1.849070080103191, -2.060703545366267, -2.096463935978529, -1.932583894008349, -1.546315756297657, -0.8980733432707229, -0.1068365855544979, 0.01118497012249267, -0.0004950852797053222, 0.0006818000976845335, 0.0005818817497560799, 0.0006065207194746521, 0.0006168636663311742, 0.000626761014881413, 0.0006349220329355104, 0.0006415553737168203, 0.0006467214302021968 + ], + "ultrasoft_cutoff_radius": 1.4, + "angular_momentum": 0, + "label": "2S" + }, + { + "cutoff_radius": 1.2, + "radial_function": [ + -2.083429826735684e-06, -2.136172104211737e-06, -2.190249558814624e-06, -2.24569599071504e-06, -2.302546055739121e-06, -2.360835287029462e-06, -2.42060011725448e-06, -2.481877901380049e-06, -2.544706940017544e-06, -2.609126503362995e-06, -2.675176855742232e-06, -2.7428992807774e-06, -2.812336107190576e-06, -2.883530735260611e-06, -2.956527663949712e-06, -3.031372518716746e-06, -3.108112080034643e-06, -3.186794312629733e-06, -3.267468395461245e-06, -3.350184752459792e-06, -3.434995084043944e-06, -3.521952399434704e-06, -3.611111049787996e-06, -3.702526762165907e-06, -3.796256674367942e-06, -3.892359370643996e-06, -3.990894918311442e-06, -4.091924905299159e-06, -4.195512478641992e-06, -4.301722383949767e-06, -4.410621005875343e-06, -4.522276409607244e-06, -4.636758383412602e-06, -4.754138482257126e-06, -4.874490072529302e-06, -4.997888377896769e-06, -5.124410526323577e-06, -5.254135598277707e-06, -5.387144676158933e-06, -5.523520894978026e-06, -5.66334949431879e-06, -5.806717871615635e-06, -5.953715636779842e-06, -6.104434668208683e-06, -6.258969170212456e-06, -6.417415731895234e-06, -6.579873387526229e-06, -6.746443678439487e-06, -6.917230716500531e-06, -7.092341249179714e-06, -7.271884726272875e-06, -7.455973368311051e-06, -7.644722236702005e-06, -7.8382493056474e-06, -8.036675535880543e-06, -8.2401249502708e-06, -8.44872471134197e-06, -8.662605200753095e-06, -8.881900100791199e-06, -9.106746477927219e-06, -9.337284868486963e-06, -9.57365936649099e-06, -9.816017713718132e-06, -1.006451139204891e-05, -1.03192957181468e-05, -1.058052994053616e-05, -1.084837733913788e-05, -1.11230053273247e-05, -1.140458555656005e-05, -1.169329402368599e-05, -1.198931118092688e-05, -1.229282204867816e-05, -1.260401633115006e-05, -1.292308853493907e-05, -1.325023809060105e-05, -1.358566947730181e-05, -1.39295923506234e-05, -1.428222167360581e-05, -1.464377785110595e-05, -1.50144868675581e-05, -1.539458042822149e-05, -1.578429610400391e-05, -1.618387747995135e-05, -1.659357430749673e-05, -1.701364266056294e-05, -1.744434509561741e-05, -1.788595081577869e-05, -1.833873583907738e-05, -1.880298317097652e-05, -1.927898298125958e-05, -1.976703278539598e-05, -2.026743763049843e-05, -2.07805102859874e-05, -2.130657143908243e-05, -2.18459498952426e-05, -2.239898278368055e-05, -2.296601576807982e-05, -2.354740326264601e-05, -2.414350865362765e-05, -2.475470452644485e-05, -2.53813728985674e-05, -2.602390545828887e-05, -2.668270380954455e-05, -2.735817972292776e-05, -2.805075539306023e-05, -2.87608637024778e-05, -2.94889484921967e-05, -3.023546483912915e-05, -3.100087934052175e-05, -3.178567040559464e-05, -3.259032855456328e-05, -3.34153567252302e-05, -3.426127058733832e-05, -3.512859886488174e-05, -3.601788366657659e-05, -3.692968082469703e-05, -3.786456024248946e-05, -3.882310625038144e-05, -3.980591797120815e-05, -4.081360969468474e-05, -4.184681126135801e-05, -4.290616845627868e-05, -4.399234341263896e-05, -4.510601502562855e-05, -4.624787937676754e-05, -4.741865016898118e-05, -4.861905917268866e-05, -4.984985668318517e-05, -5.111181198960196e-05, -5.240571385573925e-05, -5.373237101307013e-05, -5.509261266622646e-05, -5.648728901128e-05, -5.791727176714498e-05, -5.938345472043277e-05, -6.088675428409941e-05, -6.242811007023571e-05, -6.400848547735816e-05, -6.562886829256632e-05, -6.729027130894506e-05, -6.89937329585951e-05, -7.07403179616903e-05, -7.25311179919654e-05, -7.436725235905031e-05, -7.603707306022344e-05, -7.796841826311502e-05, -7.994655880500575e-05, -8.197373185087585e-05, -8.405902020189223e-05, -8.618942689231574e-05, -8.837870723041319e-05, -9.06212223494934e-05, -9.291603771529694e-05, -9.528122633657468e-05, -9.769582679556747e-05, -0.0001001724827440613, -0.0001028942633761066, -0.0001055029815875892, -0.0001081726570443511, -0.0001109123602261775, -0.0001137214816695939, -0.0001166030471308986, -0.0001195554891297041, -0.0001225821976410526, -0.0001256845941186905, -0.0001288669960119004, -0.0001321310922716281, -0.0001354768433580635, -0.0001389048094119457, -0.0001424240029018717, -0.0001460282201927502, -0.0001497304542444916, -0.000153519478137836, -0.0001574084530701744, -0.0001613911155443344, -0.0001654797564557736, -0.0001696648486922352, -0.0001739640341038752, -0.0001783680678767842, -0.000182885882816229, -0.0001875171648644161, -0.0001922623232649922, -0.0001971307639760315, -0.0002021221219487396, -0.0002072379277845667, -0.0002124857667215042, -0.0002178669369469967, -0.0002233803111001761, -0.000229035842611707, -0.0002348359515036157, -0.0002407800442907942, -0.0002468774240251755, -0.0002531281627351341, -0.0002595378492332167, -0.0002661070362416756, -0.0002728450075446001, -0.000279752842773099, -0.0002868348165006968, -0.0002941014486323358, -0.0003015437982977635, -0.0003091769577053115, -0.0003170081886757317, -0.0003250306237092979, -0.0003332612523800236, -0.0003417002867960745, -0.0003503478088952921, -0.0003592221493091025, -0.0003683137062978334, -0.0003776376767831715, -0.0003872005640724534, -0.0003970018110883886, -0.0004070501595143981, -0.00041735791302704, -0.0004279243389646428, -0.0004387583716289181, -0.000449867063512696, -0.0004612526449942388, -0.00047293065186046, -0.0004849055989598564, -0.00049717783982211, -0.0005097676033518572, -0.0005226728549988002, -0.0005359062918192518, -0.0005494736436194828, -0.0005633851169278845, -0.0005776456158425808, -0.0005922718047233625, -0.0006072644005639964, -0.0006226393278782418, -0.0006383992449889892, -0.0006545628459796846, -0.0006711341806992632, -0.0006881244929161682, -0.0007055452087012112, -0.0007234096125463823, -0.0007417192720358349, -0.0007605010756038171, -0.0007797504561691223, -0.0007994921130948455, -0.0008197341374225008, -0.0008404870958787223, -0.0008617606433373153, -0.0008835787044572912, -0.0009059478638764338, -0.0009288824648628566, -0.0009523959660574534, -0.0009765087991156017, -0.001001229098727668, -0.001026574432447284, -0.001052564242918356, -0.001079212116228132, -0.001106529749549284, -0.001134545475707274, -0.001163265757541117, -0.001192715834924054, -0.001222908823826854, -0.001253868783265996, -0.001285609899283564, -0.001318154275502843, -0.001351525881891675, -0.001385739824540719, -0.00142082248898035, -0.001456791106533279, -0.001493670235946252, -0.001531484460770574, -0.001570253459111196, -0.001610004653973611, -0.001650766177650444, -0.001692552643776668, -0.001735401415896602, -0.001779331755144626, -0.001824376843944354, -0.001870563831720555, -0.001917918689409472, -0.001966471530345113, -0.002016253161949138, -0.002067291313495618, -0.00211962997250021, -0.00217328892011264, -0.002228303410533069, -0.002284715600842221, -0.002342552766986752, -0.002401856144247657, -0.002462659374303683, -0.002525003455332837, -0.002588921217597993, -0.002654463813918002, -0.002721662466964206, -0.002790559181279733, -0.00286120489560887, -0.002933636169583198, -0.00300790188252158, -0.003084046078615363, -0.003162120650836288, -0.003242169753762421, -0.003324244489706481, -0.00340839900885315, -0.003494686311485338, -0.003583151802465104, -0.003673858960652119, -0.003766862407241492, -0.003862222650964009, -0.003959992097403762, -0.004060241055414187, -0.004163027708056487, -0.00426841620155618, -0.00437647032757504, -0.004487261541330046, -0.004600854224018203, -0.004717325449544668, -0.004836743932409661, -0.004959185014317236, -0.005084729062195656, -0.00521344763561785, -0.005345423834519589, -0.005480743230471461, -0.005619489009575072, -0.0057617451453322, -0.005907601049673451, -0.006057156994604249, -0.006210490400872307, -0.006367707655559928, -0.006528904331310316, -0.00669418383142582, -0.006863643948680278, -0.007037396075720336, -0.007215545804189814, -0.00739820558254314, -0.007585487850209882, -0.00777751284596763, -0.007974399307422804, -0.008176269256782696, -0.008383248317554172, -0.008595467248918421, -0.008813060107119033, -0.009036157506922927, -0.009264904671304183, -0.009499442291427233, -0.009739917264315862, -0.009986476819721608, -0.01023928323788217, -0.01049848162625707, -0.01076425010788438, -0.01103673655083269, -0.01131612992971556, -0.01160258944219591, -0.0118963006461949, -0.01219745038857386, -0.01250621917783593, -0.01282281074827275, -0.01314740773444498, -0.01348022955652164, -0.013821467105737, -0.01417134873220604, -0.01453008925608908, -0.01489790374802339, -0.01527503223404605, -0.01566170775872868, -0.01605817068856688, -0.01646466878933817, -0.01688145456742667, -0.0173087940778715, -0.01774694900791074, -0.01819619419577853, -0.01865681220474406, -0.01912909094717478, -0.01961332373535368, -0.0201098091136666, -0.02061892265392779, -0.02114075125541996, -0.0216760462124951, -0.02222453500418523, -0.02278734852715982, -0.02336390428791207, -0.02395586640498466, -0.02456150463465167, -0.02518375370220154, -0.02582125399205726, -0.0264749023043982, -0.02714471751565305, -0.02783209044434485, -0.02853671613036571, -0.02925873831914078, -0.02999971001398825, -0.03075896759163283, -0.031537547190308, -0.03233589950237886, -0.03315430702483819, -0.03399359589812733, -0.03485441302767166, -0.03573599491118629, -0.03664108247263041, -0.03756862220025725, -0.03851920691677016, -0.03949449138859699, -0.04049407824808322, -0.04151919081867619, -0.04256994999049647, -0.04364767231383211, -0.0447525916746106, -0.04588512679649763, -0.04704665352675301, -0.04823754708594462, -0.04945851232244118, -0.05071041347669387, -0.05199397032905055, -0.05331001394092089, -0.0546594230308511, -0.05604287977506001, -0.0574613740651163, -0.05891590329194415, -0.06040707869866668, -0.06193605860451902, -0.06350364692943603, -0.06511135491164204, -0.0667587580557587, -0.06844889968269871, -0.07018134310332808, -0.0719574104611638, -0.07377906574217788, -0.07564608521824434, -0.07756089196415733, -0.07952386336888587, -0.08153669073378458, -0.08360025727732134, -0.08571622877796983, -0.08788563956209097, -0.09010996847874378, -0.09239047318107849, -0.09472902522984741, -0.09712602063045955, -0.0995847649286811, -0.1021046856064156, -0.1046890485193208, -0.1073381947445344, -0.1100550718015139, -0.112840037194629, -0.1156959962796909, -0.1186235935126001, -0.1216260565834468, -0.124703793918076, -0.127859807504358, -0.1310951233598144, -0.1344130095778503, -0.13781466643335, -0.1413016103487837, -0.1448776711198161, -0.1485440302087271, -0.1523026140298179, -0.156156659994788, -0.16010847931251, -0.1641594059707444, -0.1683138090312572, -0.1725723733128937, -0.1769394137148112, -0.1814162216385642, -0.1860068201210154, -0.1907130512907604, -0.1955387116632414, -0.2004859119741556, -0.2055589373157958, -0.2107593948324125, -0.216092244631368, -0.2215589070883101, -0.2271648798862341, -0.2329116731093082, -0.238804372605265, -0.2448458877313083, -0.2510398898942917, -0.2573909059196112, -0.2639020359019869, -0.270578060117174, -0.2774228925596789, -0.2844404182758939, -0.2916356197026637, -0.2990123742806523, -0.3065758321167262, -0.3143305235119415, -0.3222807521982951, -0.3304322834649142, -0.3387896007683969, -0.3473581231708676, -0.356143227132043, -0.3651501101299227, -0.37438460160295, -0.3838525720326409, -0.3935594043593778, -0.4035116115826752, -0.4137151313906638, -0.4241763112041567, -0.4349016918947125, -0.4458981517511256, -0.4571718376184305, -0.4687305294237855, -0.4805808378833755, -0.4927304667807572, -0.505186671447653, -0.5179573386599954, -0.5310503604945641, -0.5444736727729882, -0.5582359307122233, -0.5723452704070662, -0.5868106165916724, -0.6016409369133802, -0.6168453535133231, -0.632433236002222, -0.6484142496338439, -0.6647982091954114, -0.6815954302678502, -0.6988159143969381, -0.7164707397381702, -0.734570307899818, -0.7531262443729141, -0.7721495973855412, -0.79165232811837, -0.8116463907864464, -0.832143933253999, -0.8531579147084032, -0.8747008105642862, -0.8967861779122892, -0.9194274754814202, -0.9426386248432326, -0.9664338010043403, -0.9908276439242543, -1.015835139926905, -1.041471482885679, -1.067752451366857, -1.094694118769436, -1.122312875872042, -1.150625684667635, -1.179649685974911, -1.209402750846321, -1.239902851910581, -1.271168560971395, -1.303219020291456, -1.33607356754204, -1.369752191766632, -1.404275324707136, -1.439663857329253, -1.475939206921018, -1.513123338064585, -1.551238616379778, -1.590308117713953, -1.63035526455998, -1.671404307862784, -1.713479685047984, -1.756606881036452, -1.800811495884279, -1.846120116685349, -1.892559762050021, -1.940158114016261, -1.988943472663437, -2.038944871758497, -2.090191912465096, -2.142715022920636, -2.196545057391513, -2.251713927999837, -2.308254010368297, -2.366198430349999, -2.42558123748839, -2.486437016822051, -2.54880127080695, -2.61271022340064, -2.678200915210324, -2.745311163933784, -2.814079654453445, -2.884545887787546, -2.956750112947819, -3.030733620090972, -3.106538368851597, -3.184207358025456, -3.263784308127796, -3.345313976661938, -3.42884193387258, -3.514414697838511, -3.602079655943035, -3.691885197411126, -3.783880561080251, -3.878115922177396, -3.974642469406825, -4.073512196525772, -4.174778152836162, -4.278494242405267, -4.38471530588099, -4.493497182091735, -4.604896479372085, -4.718970859777503, -4.835778816595463, -4.955379684769774, -5.077833721985614, -5.203202012832916, -5.331546444014002, -5.46292968017381, -5.597415197140341, -5.73506711616953, -5.875950329122496, -6.020130294162277, -6.16767308046151, -6.318645325832742, -6.473114082697871, -6.631146843192962, -6.792811419140394, -6.958175877680425, -7.127308426627852, -7.300277352514991, -7.47715088219636, -7.657997080136932, -7.842883737753935, -8.031878182604816, -8.225047211200598, -8.42245683594917, -8.624172182698448, -8.830257284606553, -9.040774822681357, -9.255786016169502, -9.475350274738698, -9.699525017988655, -9.92836537260353, -10.16192388713451, -10.40025020757417, -10.6433907946101, -10.89138845313154, -11.14428208693564, -11.40210617163406, -11.66489040115128, -11.93265916490348, -12.20543108662388, -12.48321847612021, -12.7660267875706, -13.05385399895337, -13.34669000575158, -13.64451593380055, -13.94730342556624, -14.25501392376968, -14.56759781838485, -14.88499366903011, -15.20712728896443, -15.53391081374053, -15.86524174272577, -16.20100188554348, -16.54105629987285, -16.88525215885031, -17.23341755347717, -17.58536027016384, -17.94086648497716, -18.29969943570115, -18.66159799471316, -19.02627525217395, -19.39341696761201, -19.76268005025847, -20.13369093546517, -20.50604393070667, -20.87929951622089, -21.25298263616513, -21.62658087172829, -21.99954269836748, -22.37127562326275, -22.74114435686119, -23.10846896111634, -23.47252300560951, -23.83253174516197, -24.18767028453502, -24.53706186205671, -24.87977610120915, -25.21482740029523, -25.54117339173116, -25.85771352540107, -26.16328778565092, -26.45667558524229, -26.73659486737051, -27.00170138496721, -27.25058832118079, -27.48178613977246, -27.6937628119424, -27.88492443057665, -28.05361620930216, -28.19812401686734, -28.31667639809718, -28.40744718182792, -28.46855875370237, -28.49808599010304, -28.49406098553988, -28.45447858496644, -28.37730281070041, -28.26047424697933, -28.10191843896168, -27.89955540569564, -27.65131029025469, -27.35512522446174, -27.00897251005713, -26.61086911879465, -26.1588926003546, -25.65119843392024, -25.08603886528468, -24.4617832560438, -23.77693997903896, -23.03017982632509, -22.22036097499896, -21.34655541101303, -20.40807681406251, -19.40450981595659, -18.33574047909235, -17.20198793214568, -16.00383691708171, -14.74227110958757, -13.4187068689503, -12.03502720013208, -10.59361550055644, -9.097388718457026, -7.549829434711214, -5.955016301495192, -4.317652238375487, -2.643089689128246, -0.9373521386989694, 0.792848998421988, 2.540103014978541, 4.296295235700342, 6.052610100970753, 7.799543337064293, 9.526924549480043, 11.22395168197886, 12.87923892261705, 14.48087962072768, 16.01652591663176, 17.47348676074739, 18.83884596825564, 20.09960191833335, 21.24283034978669, 22.2558714328718, 23.12654199128667, 23.8433732136258, 24.39587348629846, 24.77481507425735, 24.97254216608681, 24.98329616338972, 24.80355218827253, 24.43235817948819, 23.87166478846598, 23.12663043274569, 22.20588086964604, 21.1216968343897, 19.89009584980396, 18.530765395207, 17.06679373634221, 15.52413124511997, 13.9306988671506, 12.31504036497559, 10.70439063112078, 9.122002501010577, 7.583537987148581, 6.092285182306387, 4.64265292673338, 3.253050868283164, 1.951607029473241, 0.7688583133504476, -0.2622595012459241, -1.106452057974111, -1.726138124842932, -2.08158826110864, -2.130833135002339, -1.832280642962495, -1.121539645771771, -0.139404713261165, 0.01396323001121204, -0.001222447501044017, 0.0003018596311060253, 0.0001683929399766977, 0.0001982313325329802, 0.0002108507003279493, 0.0002241757621935156, 0.0002364564942668372, 0.0002478943963606954, 0.0002584977401777833 + ], + "ultrasoft_cutoff_radius": 1.4, + "angular_momentum": 1, + "label": "2P" + }, + { + "cutoff_radius": 1.2, + "radial_function": [ + 3.620594448309543e-06, 3.712250234150883e-06, 3.806226297309459e-06, 3.902581375887864e-06, 4.001375694950989e-06, 4.10267100416863e-06, 4.206530616411061e-06, 4.313019447321679e-06, 4.422204055891358e-06, 4.534152686060048e-06, 4.648935309371435e-06, 4.766623668707457e-06, 4.887291323129961e-06, 5.011013693857527e-06, 5.137868111406208e-06, 5.26793386392361e-06, 5.401292246746606e-06, 5.538026613213588e-06, 5.678222426763015e-06, 5.821967314350909e-06, 5.969351121220513e-06, 6.120465967058577e-06, 6.275406303573146e-06, 6.434268973528986e-06, 6.597153271277524e-06, 6.764161004819016e-06, 6.935396559435945e-06, 7.110966962937231e-06, 7.290981952554098e-06, 7.475554043529546e-06, 7.66479859944395e-06, 7.858833904321191e-06, 8.057781236559996e-06, 8.261764944736962e-06, 8.470912525328533e-06, 8.685354702400432e-06, 8.905225509314534e-06, 9.130662372504141e-06, 9.361806197370009e-06, 9.598801456350947e-06, 9.841796279223724e-06, 1.009094254568912e-05, 1.034639598030182e-05, 1.060831624980327e-05, 1.087686706291879e-05, 1.115221627268085e-05, 1.143453598134275e-05, 1.172400264794828e-05, 1.202079719862441e-05, 1.232510513966611e-05, 1.263711667348397e-05, 1.295702681748689e-05, 1.328503552597453e-05, 1.362134781511529e-05, 1.396617389108831e-05, 1.431972928146918e-05, 1.468223496994188e-05, 1.505391753442095e-05, 1.543500928867013e-05, 1.582574842750627e-05, 1.622637917567895e-05, 1.663715194051903e-05, 1.70583234684517e-05, 1.749015700547132e-05, 1.793292246167924e-05, 1.83868965799863e-05, 1.885236310908656e-05, 1.932961298080966e-05, 1.981894449196286e-05, 2.032066349077669e-05, 2.083508356807008e-05, 2.136252625325528e-05, 2.190332121530441e-05, 2.245780646880368e-05, 2.302632858522386e-05, 2.360924290953894e-05, 2.420691378232875e-05, 2.481971476750411e-05, 2.544802888579671e-05, 2.609224885416017e-05, 2.675277733123105e-05, 2.743002716900438e-05, 2.812442167087986e-05, 2.883639485624078e-05, 2.956639173173111e-05, 3.03148685693993e-05, 3.108229319188394e-05, 3.186914526481881e-05, 3.267591659664004e-05, 3.350311144598334e-05, 3.435124683686239e-05, 3.522085288182679e-05, 3.611247311330031e-05, 3.702666482330713e-05, 3.796399941179874e-05, 3.892506274379787e-05, 3.991045551558466e-05, 4.092079363015215e-05, 4.195670858216696e-05, 4.301884785267511e-05, 4.410787531379981e-05, 4.522447164368462e-05, 4.636933475194046e-05, 4.754318021586357e-05, 4.874674172769612e-05, 4.998077155320882e-05, 5.124604100189349e-05, 5.254334090905824e-05, 5.387348213012705e-05, 5.523729604745273e-05, 5.663563508995993e-05, 5.806937326594273e-05, 5.953940670935109e-05, 6.10466542399053e-05, 6.259205793739138e-05, 6.417658373049354e-05, 6.580122200053394e-05, 6.746698820049612e-05, 6.9174923489719e-05, 7.092609538465877e-05, 7.272159842612407e-05, 7.456255486340362e-05, 7.64501153557119e-05, 7.838545969139228e-05, 8.036979752532768e-05, 8.240436913501778e-05, 8.449044619579738e-05, 8.662933257567979e-05, 8.882236515032115e-05, 9.107091463861735e-05, 9.337638645945173e-05, 9.57402216101349e-05, 9.816389756707949e-05, 0.0001006489292092784, 0.0001031968697651596, 0.0001058093117834112, 0.0001084878881283832, 0.0001112342730006895, 0.0001140501829836446, 0.0001169373781161937, 0.0001198976629930019, 0.0001229328878923945, 0.0001260449499328515, 0.0001292357942587778, 0.0001321200103227642, 0.0001354764011705571, 0.0001389139419830985, 0.0001424366032754115, 0.0001460608157885702, 0.0001497628103064099, 0.0001535675187183279, 0.000157464562072394, 0.0001614521230215404, 0.0001655629854487833, 0.0001697588661352558, 0.0001740626584560228, 0.0001788158590720591, 0.0001833497560955885, 0.0001879886626220359, 0.000192749946171807, 0.000197631718532794, 0.0002026394587648708, 0.0002077703417714358, 0.0002130298645554143, 0.0002184213519175299, 0.0002239515047075834, 0.0002296240698204316, 0.0002354384694775237, 0.0002413950443886223, 0.0002475112521234778, 0.0002537742432479989, 0.0002602085636765383, 0.0002667930349477364, 0.000273551194430501, 0.0002804724413563857, 0.0002875775650035087, 0.0002948500785180359, 0.0003023217129820607, 0.0003099746719607782, 0.0003178263067946212, 0.000325874328162944, 0.0003341203219170941, 0.0003425809343059111, 0.0003512545916885866, 0.0003601451592602041, 0.0003692646609764461, 0.0003786162544359173, 0.000388197341374115, 0.0003980251676326375, 0.0004081052234365776, 0.0004184343129581562, 0.0004290306941171944, 0.0004398933036852977, 0.0004510319914334877, 0.000462448111483131, 0.0004741571450436933, 0.0004861616261604995, 0.0004984687945083854, 0.0005110969859377461, 0.0005240303717980212, 0.0005372948675058174, 0.0005509044653874253, 0.0005648456917692695, 0.0005791488024167201, 0.000593814663823806, 0.0006088418000590231, 0.0006242641961193502, 0.0006400633266552998, 0.0006562662319018089, 0.0006728852739658238, 0.0006899175014161384, 0.0007073793266761821, 0.0007252926343552816, 0.0007436546805603052, 0.0007624825441513703, 0.0007817870634133923, 0.0008015727301716538, 0.0008218668668192588, 0.0008426769840528562, 0.0008640036967938693, 0.0008858821778910647, 0.0009083091099131723, 0.0009313064384401542, 0.0009548835361053974, 0.0009790594434596022, 0.001003840866861682, 0.001029258611242427, 0.001055312788018811, 0.001082031199620358, 0.00110941892832808, 0.001137507997471094, 0.00116630578539003, 0.001195831724923237, 0.001226105168394419, 0.001257150754046197, 0.001288968576859552, 0.001321608232280143, 0.001355059595411324, 0.001389366627891102, 0.001424543862325108, 0.001460608142054164, 0.001497577134338399, 0.00153549287590552, 0.001574366002155129, 0.001614221935880292, 0.001655083514410056, 0.001696987084248308, 0.001739946038800998, 0.001783991027075174, 0.001829156388757154, 0.001875465268532342, 0.001922937591086473, 0.001971623779615819, 0.002021533725708125, 0.002072712365990319, 0.002125181566188529, 0.002178984081812696, 0.002234143741918856, 0.002290699240025963, 0.002348692796691322, 0.00240814965790233, 0.00246911667492001, 0.002531623018636006, 0.002595711559799058, 0.002661425435371119, 0.002728798101416405, 0.002797877668247361, 0.002868713659380764, 0.002941329825377096, 0.003015792694980654, 0.003092134629744734, 0.003170414201859898, 0.003250678248592745, 0.003332971685346246, 0.003417346987604004, 0.003503857437917959, 0.003592551233432264, 0.003683505988718233, 0.003776754534365333, 0.003872358536227938, 0.003970391797795951, 0.004070901293458253, 0.004173958869091523, 0.004279622721516874, 0.004387964477715374, 0.004499040402466417, 0.004612940812702289, 0.004729718781413629, 0.004849447121439397, 0.004972215616280262, 0.005098086568852836, 0.005227145538632979, 0.005359468915433088, 0.005495146943021082, 0.005634256304974463, 0.005776885592920474, 0.005923129406555526, 0.006073079845154572, 0.006226814772811494, 0.006384445557893765, 0.006546066756722039, 0.006711783956748117, 0.006881687332192989, 0.007055899916096967, 0.007234522595410289, 0.007417666664518568, 0.007605442940197878, 0.007797975758455086, 0.007995376614421934, 0.008197780239769924, 0.008405305292033375, 0.008618082964629691, 0.008836253334014832, 0.009059939901544568, 0.00928928770573937, 0.009524445188666117, 0.00976555730367053, 0.01001276943403462, 0.01026623706864768, 0.01052613537322136, 0.01079259717319275, 0.01106580856764347, 0.01134593533712508, 0.01163315740946063, 0.01192764400090469, 0.01222958951492316, 0.01253917703705518, 0.01285660205684526, 0.01318205997021455, 0.01351575955684363, 0.01385790765833576, 0.01420871585518471, 0.01456840238373448, 0.01493719491384039, 0.01531532593137193, 0.01570302259171918, 0.01610053730057519, 0.0165081146699096, 0.01692600992320321, 0.01735447832792987, 0.0177938030290431, 0.01824423474907266, 0.01870608474091046, 0.01917960814508055, 0.01966513479879722, 0.02016294056523659, 0.0206733479328633, 0.02119668209081093, 0.02173325613796444, 0.02228342471504444, 0.02284750432040784, 0.02342587749104007, 0.0240188741545071, 0.02462689304043752, 0.02525030645646958, 0.02588948790094375, 0.02654485494559686, 0.02721681268211996, 0.02790577852911399, 0.02861218269753617, 0.02933646445803842, 0.03007908627730679, 0.03084050250056722, 0.03162119077905056, 0.03242164202020907, 0.03324235705651619, 0.03408384516854535, 0.03494662446604802, 0.03583135640263904, 0.03673816137822995, 0.03766840066699159, 0.03862152183827179, 0.03959959197841886, 0.04060147826130119, 0.04163023983109318, 0.04268260535123575, 0.04376398825720756, 0.04487181534105933, 0.04600770757445902, 0.0471716463909507, 0.04836616972110434, 0.04959065075075716, 0.05084531736160824, 0.052132982932344, 0.05345237635862413, 0.05480536165993637, 0.05619270198024669, 0.05761488339656853, 0.05907336893337084, 0.06056929068798204, 0.06210119253539219, 0.06367405943431076, 0.06528589753710956, 0.0669377329838885, 0.06863256294727471, 0.07036957041902944, 0.07215096586334965, 0.07397689789110026, 0.07584971386489042, 0.0777697857884885, 0.07973780630651478, 0.08175624575648974, 0.08382569820851171, 0.08594741486472032, 0.08812287340775113, 0.09035334806109985, 0.0926402707404452, 0.09498517567824288, 0.09738923271369312, 0.09985417454738851, 0.1023817457634909, 0.1049729814995387, 0.1076299061359968, 0.1103539175658031, 0.1131476842593527, 0.1160103159436081, 0.118947336823867, 0.1219577932666455, 0.125044033064348, 0.1282095683168541, 0.1314538268017481, 0.1347811971603998, 0.1381922117228285, 0.1416898844583372, 0.1452756932787747, 0.1489525804926879, 0.1527223059696474, 0.156587466357052, 0.1605502117203114, 0.1646138654457979, 0.1687789719895723, 0.1730514991433464, 0.1774302024715232, 0.1819209596178053, 0.1865241877883924, 0.1912452032653265, 0.1960844314453287, 0.2010470664264765, 0.2061341174044967, 0.211351316892553, 0.216699232149743, 0.2221831962913284, 0.2278048767046393, 0.2335700974361901, 0.2394808442844572, 0.2455396911828928, 0.2517534570967961, 0.2581240971671018, 0.264654906463896, 0.2713516066710815, 0.2782182028239899, 0.2852569067847348, 0.292475466176498, 0.2998749089358688, 0.3074628714837898, 0.3152414782569577, 0.3232178144996818, 0.3313950004282891, 0.3397796880815653, 0.3483754911212786, 0.3571899339248775, 0.3662256761258229, 0.3754914945694137, 0.3849896722158477, 0.3947299518871448, 0.4047147776961287, 0.4149531090877706, 0.4254499389938073, 0.4362116336879277, 0.4472461072471089, 0.4585586666683159, 0.4701576899918951, 0.4820499337418702, 0.4942421187839253, 0.5067429557154965, 0.5195591089615423, 0.5326995854698835, 0.5461722282979644, 0.5599844547508899, 0.5741463612213218, 0.5886656716898762, 0.603551808659256, 0.6188141232663502, 0.6344615967574754, 0.6505043822738084, 0.6669526783100783, 0.6838157817754584, 0.7011050388560864, 0.718830739415818, 0.7370038950291036, 0.7556358671219996, 0.7747386094983881, 0.7943227291285835, 0.8144017984748149, 0.8349872386848436, 0.8560924533890879, 0.8777300008196367, 0.8999135870459947, 0.9226568995337252, 0.945973693288628, 0.969879017327345, 0.9943869477663858, 1.019512990544086, 1.045272700025955, 1.071681898872047, 1.0987568378059, 1.126514279652258, 1.15497124762681, 1.184145650060073, 1.214054802089245, 1.244717951391952, 1.27615309785022, 1.308380441600395, 1.341419106480539, 1.375289853390778, 1.410013406338942, 1.445610811716064, 1.482104573443552, 1.519516270751802, 1.55786944469041, 1.597187407644877, 1.637494300271793, 1.678814682220663, 1.721173933059764, 1.764598033984245, 1.809113299804817, 1.854747087089295, 1.901527248359942, 1.949482177827341, 1.998641276680575, 2.049034228272472, 2.100692014041234, 2.153645754333263, 2.207927622358915, 2.263570791696289, 2.320608743774972, 2.379076107518136, 2.439008268627903, 2.500441410515829, 2.563412618718095, 2.627959938401423, 2.694122088701095, 2.761939034778082, 2.831451323128028, 2.902700956541573, 2.975730221593624, 3.050583263813685, 3.127304377619464, 3.205939601118025, 3.286535702075333, 3.369140593476173, 3.453803258266351, 3.540573953301531, 3.629503904879916, 3.720645786333812, 3.81405297776583, 3.909780724916456, 4.007885049421448, 4.108423271565989, 4.21145432760828, 4.317038057705668, 4.425235903243497, 4.536110551112676, 4.649726099908911, 4.766147994718768, 4.885443184745103, 5.007680033905112, 5.132928191676816, 5.261259133478982, 5.392745475052109, 5.527461654811761, 5.665483345880173, 5.8068880410719, 5.95175463517756, 6.100163678028887, 6.252197229976536, 6.407939103476517, 6.567474589020863, 6.730890613645466, 6.898275882521455, 7.069720502467574, 7.245316441090749, 7.4251571623549, 7.609337779396553, 7.797955173334453, 7.991107574378592, 8.188895091562095, 8.391419308043385, 8.59878330697848, 8.811091826534039, 9.028451092038466, 9.250968778009987, 9.478753974017266, 9.711917254087513, 9.950570385140889, 10.19482656776012, 10.44480007388402, 10.70060634582636, 10.96236192902595, 11.23018421314045, 11.50419148966931, 11.78450275747579, 12.07123762197941, 12.36451611096766, 12.6644585845067, 12.97118550889396, 13.28481729821751, 13.60547414123805, 13.93327568675801, 14.26834095601259, 14.6107879178538, 14.96073334253966, 15.31829246843366, 15.68357857397691, 16.05670282798781, 16.43777370591731, 16.82689671819771, 17.22417391499371, 17.62970343072611, 18.04357895881897, 18.46588930711127, 18.89671761471075, 19.3361409858604, 19.78422961125433, 20.24104621506759, 20.70664519253658, 21.18107187270877, 21.66436161847994, 22.15653895292593, 22.65761654047352, 23.16759420439048, 23.68645779969812, 24.21417804472687, 24.75070935372057, 25.29598842574843, 25.84993297672325, 26.41244022669009, 26.98338536137521, 27.56261995319639, 28.1499702133851, 28.74523523061325, 29.34818509196339, 29.95855888163906, 30.57606264662587, 31.20036720785537, 31.83110593416863, 32.46787234147222, 33.11021770419214, 33.75764843483984, 34.40962350767163, 35.06555169295935, 35.72478873356312, 36.38663442750901, 37.05032969116331, 37.71505339374934, 38.37991933238438, 39.043972979187, 39.70618826481031, 40.36546430763185, 41.02062212772304, 41.67040137416355, 42.31345700076392, 42.94835611878077, 43.57357475653374, 44.18749482431009, 44.78840115708746, 45.37447873309878, 45.94381007147109, 46.49437289343227, 47.02403808853688, 47.53056792697052, 48.01161481595977, 48.46472037914626, 48.88731513332846, 49.27671876602633, 49.63014100572033, 49.94468335818485, 50.21734160333655, 50.44500924115291, 50.62448201590683, 50.75246350925715, 50.82557204178107, 50.84034889544672, 50.79326802443359, 50.68074736283431, 50.49916183506751, 50.24485825464263, 49.91417215139496, 49.5034466797061, 49.00905379796418, 48.42741773290474, 47.75504090907177, 46.98853241580085, 46.12463911705665, 45.16027946600862, 44.09258011832804, 42.9189153110725, 41.63694912150036, 40.24468046620191, 38.74049088485372, 37.1231949975224, 35.39209340830745, 33.54702799452679, 31.58843919627384, 29.51742511798041, 27.33580188452788, 25.04616492934196, 22.65195051007024, 20.15749685208819, 17.56810411067201, 14.89009220096588, 12.13085548737391, 9.29891314396761, 6.403953806438579, 3.456872972656157, 0.4698015883260496, -2.54387623921437, -5.569518296014621, -8.59123446688497, -11.59191450151602, -14.55327596472893, -17.45593527218304, -20.27950470658593, -23.00271859439502, -25.60359181462138, -28.05961379853611, -30.34798116298526, -32.44587188680654, -34.33076351086666, -35.98079735089718, -37.37518981762449, -38.49469073247889, -39.32208696113921, -39.84274758287285, -40.04520391656573, -39.92175435257519, -39.4690792832036, -38.68884574811196, -37.58827446319845, -36.18063284438844, -34.48560706251584, -32.5294926052446, -30.34512650970535, -27.97146449959422, -25.45268153366126, -22.8366445753297, -20.17256957019146, -17.50762985388188, -14.8822282651898, -12.32357803356743, -9.837155261888931, -7.412707891284512, -5.082330809062732, -2.895460159427415, -0.9060196610415332, 0.8275713312371806, 2.242443615612908, 3.271380087100944, 3.843037981172657, 3.881758495407345, 3.312236650221531, 2.017923249336611, 0.2499743108911748, -0.02514493936175287, 0.00209852092626301, -0.0006335094457909671, -0.0003917283574734506, -0.000442914080207676, -0.0004632353885380587, -0.0004848511423934062, -0.0005046257756142372, -0.0005229239569980314, -0.0005397632283917746 + ], + "ultrasoft_cutoff_radius": 1.4, + "angular_momentum": 1, + "label": "2P" + } + ], + "D_ion": [ + 0.6740554057380005, 0.40423205754159186, 0.0, 0.0, 0.40423205754159186, 0.19966364386046154, 0.0, 0.0, 0.0, 0.0, 1.030403004439778, 0.722651316841888, 0.0, 0.0, 0.722651316841888, 0.5058896267883665 + ], + "local_potential": [ + -15.06298851924158, -15.06298851988472, -15.062988520544145, -15.06298852122027, -15.06298852191351, -15.0629885226243, -15.062988523353084, -15.06298852410032, -15.06298852486648, -15.06298852565203, -15.062988526457465, -15.062988527283295, -15.062988528130035, -15.06298852899821, -15.06298852988836, -15.06298853080104, -15.06298853173683, -15.062988532696306, -15.06298853368007, -15.06298853468874, -15.06298853572294, -15.062988536783324, -15.06298853787055, -15.0629885389853, -15.06298854012827, -15.062988541300175, -15.06298854250174, -15.062988543733725, -15.0629885449969, -15.062988546292045, -15.062988547619986, -15.062988548981535, -15.062988550377556, -15.062988551808916, -15.06298855327651, -15.062988554781255, -15.062988556324095, -15.062988557905985, -15.06298855952793, -15.062988561190926, -15.062988562896026, -15.06298856464428, -15.0629885664368, -15.0629885682747, -15.06298857015912, -15.062988572091244, -15.06298857407228, -15.062988576103464, -15.062988578186076, -15.062988580321395, -15.062988582510775, -15.062988584755585, -15.06298858705721, -15.06298858941711, -15.06298859183675, -15.06298859431764, -15.06298859686133, -15.062988599469415, -15.06298860214353, -15.06298860488533, -15.062988607696544, -15.062988610578925, -15.06298861353427, -15.06298861656443, -15.062988619671295, -15.062988622856816, -15.062988626122975, -15.062988629471816, -15.062988632905435, -15.062988636425976, -15.06298864003563, -15.06298864373667, -15.0629886475314, -15.0629886514222, -15.062988655411486, -15.062988659501764, -15.062988663695585, -15.06298866799557, -15.062988672404416, -15.06298867692487, -15.06298868155975, -15.06298868631197, -15.06298869118449, -15.06298869618036, -15.062988701302695, -15.062988706554705, -15.062988711939665, -15.062988717460955, -15.062988723122006, -15.062988728926365, -15.06298873487767, -15.062988740979625, -15.06298874723605, -15.06298875365086, -15.06298876022806, -15.06298876697176, -15.06298877388617, -15.062988780975624, -15.062988788244555, -15.062988795697486, -15.06298880333909, -15.062988811174135, -15.06298881920753, -15.062988827444295, -15.062988835889564, -15.062988844548626, -15.06298885342689, -15.06298886252991, -15.06298887186337, -15.062988881433105, -15.0629888912451, -15.06298890130548, -15.062988911620534, -15.062988922196725, -15.06298893304064, -15.06298894415907, -15.06298895555896, -15.06298896724744, -15.06298897923181, -15.06298899151957, -15.06298900411838, -15.062989017036134, -15.062989030280894, -15.06298904386095, -15.06298905778478, -15.06298907206109, -15.0629890866988, -15.06298910170706, -15.062989117095254, -15.062989132872994, -15.062989149050145, -15.062989165636814, -15.06298918264338, -15.062989200080455, -15.06298921795895, -15.06298923629003, -15.06298925508516, -15.06298927435608, -15.062989294114844, -15.062989314373795, -15.06298933514559, -15.06298935644322, -15.06298937827999, -15.062989400669554, -15.062989423625906, -15.062989447163385, -15.062989471296705, -15.06298949604095, -15.0629895214116, -15.06298954742449, -15.062989574095885, -15.062989601442455, -15.06298962948129, -15.06298965822992, -15.062989687706304, -15.062989717928875, -15.062989748916515, -15.06298978068859, -15.062989813264965, -15.06298984666599, -15.062989880912545, -15.06298991602603, -15.06298995202839, -15.06298998894214, -15.06299002679033, -15.062990065596635, -15.062990105385285, -15.062990146181164, -15.062990188009765, -15.062990230897226, -15.062990274870355, -15.06299031995663, -15.06299036618423, -15.062990413582055, -15.0629904621797, -15.062990512007575, -15.06299056309679, -15.062990615479285, -15.062990669187805, -15.062990724255895, -15.062990780717994, -15.062990838609364, -15.06299089796621, -15.06299095882561, -15.062991021225596, -15.06299108520518, -15.062991150804335, -15.062991218064054, -15.06299128702638, -15.0629913577344, -15.062991430232316, -15.062991504565415, -15.062991580780166, -15.062991658924195, -15.062991739046325, -15.062991821196645, -15.062991905426475, -15.06299199178846, -15.06299208033657, -15.06299217112613, -15.06299226421389, -15.06299235965801, -15.062992457518135, -15.062992557855425, -15.062992660732565, -15.06299276621386, -15.062992874365214, -15.06299298525421, -15.062993098950145, -15.06299321552405, -15.062993335048795, -15.06299345759904, -15.062993583251375, -15.062993712084324, -15.06299384417837, -15.06299397961607, -15.062994118482045, -15.062994260863055, -15.06299440684808, -15.062994556528325, -15.062994709997325, -15.06299486735096, -15.062995028687554, -15.062995194107915, -15.062995363715386, -15.06299553761595, -15.062995715918255, -15.062995898733705, -15.062996086176515, -15.062996278363794, -15.062996475415625, -15.062996677455105, -15.06299688460847, -15.062997097005136, -15.06299731477779, -15.062997538062495, -15.06299776699873, -15.062998001729525, -15.06299824240151, -15.062998489165045, -15.062998742174274, -15.062999001587245, -15.06299926756602, -15.06299954027674, -15.06299981988975, -15.063000106579725, -15.063000400525736, -15.063000701911385, -15.06300101092493, -15.063001327759386, -15.063001652612645, -15.063001985687604, -15.063002327192295, -15.063002677340014, -15.063003036349444, -15.063003404444805, -15.063003781855976, -15.06300416881866, -15.063004565574515, -15.06300497237132, -15.063005389463106, -15.06300581711032, -15.063006255580015, -15.063006705145986, -15.06300716608895, -15.06300763869672, -15.06300812326438, -15.06300862009449, -15.06300912949724, -15.06300965179067, -15.06301018730086, -15.063010736362125, -15.06301129931724, -15.06301187651763, -15.063012468323615, -15.063013075104605, -15.063013697239365, -15.063014335116206, -15.063014989133285, -15.06301565969878, -15.063016347231216, -15.063017052159665, -15.063017774924054, -15.06301851597543, -15.063019275776204, -15.06302005480051, -15.063020853534406, -15.063021672476275, -15.063022512137096, -15.063023373040645, -15.063024255723985, -15.063025160737885, -15.06302608864682, -15.06302704002952, -15.063028015479455, -15.06302901560502, -15.063030041029956, -15.063031092393715, -15.063032170351924, -15.06303327557677, -15.06303440875742, -15.06303557060033, -15.063036761829846, -15.063037983188595, -15.063039235437945, -15.063040519358426, -15.0630418357503, -15.06304318543396, -15.06304456925049, -15.063045988062195, -15.063047442753184, -15.063048934229744, -15.06305046342109, -15.06305203127973, -15.06305363878234, -15.063055286930075, -15.063056976749374, -15.06305870929254, -15.06306048563833, -15.06306230689274, -15.063064174189485, -15.063066088691, -15.06306805158883, -15.063070064104584, -15.063072127490624, -15.063074243030785, -15.06307641204119, -15.063078635871156, -15.063080915903805, -15.06308325355712, -15.06308565028475, -15.063088107576815, -15.063090626960955, -15.063093210003146, -15.063095858308765, -15.063098573523416, -15.06310135733402, -15.063104211470044, -15.06310713770422, -15.0631101378538, -15.06311321378158, -15.063116367397075, -15.06311960065788, -15.06312291557047, -15.0631263141917, -15.06312979862994, -15.06313337104646, -15.06313703365653, -15.06314078873107, -15.06314463859771, -15.06314858564237, -15.063152632310695, -15.063156781109456, -15.063161034608155, -15.06316539544052, -15.063169866306035, -15.06317444997166, -15.0631791492735, -15.06318396711828, -15.063188906485514, -15.063193970428815, -15.06319916207803, -15.06320448464106, -15.06320994140563, -15.06321553574145, -15.063221271102044, -15.06322715102689, -15.063233179143495, -15.06323935916956, -15.0632456949152, -15.063252190285075, -15.063258849280885, -15.0632656760035, -15.06327267465563, -15.06327984954403, -15.06328720508222, -15.063294745792975, -15.06330247631092, -15.06331040138537, -15.063318525883, -15.06332685479063, -15.06333539321825, -15.06334414640183, -15.0633531197064, -15.063362318629185, -15.063371748802714, -15.063381415998006, -15.06339132612795, -15.06340148525065, -15.063411899572856, -15.0634225754535, -15.06343351940727, -15.06344473810835, -15.063456238394105, -15.06346802726893, -15.06348011190823, -15.063492499662345, -15.06350519806071, -15.063518214815934, -15.063531557828176, -15.063545235189405, -15.06355925518788, -15.063573626312655, -15.06358835725822, -15.063603456929226, -15.063618934445255, -15.063634799145785, -15.063651060595134, -15.06366772858759, -15.06368481315261, -15.06370232456011, -15.06372027332587, -15.063738670217026, -15.063757526257694, -15.06377685273465, -15.063796661203165, -15.063816963492895, -15.06383777171393, -15.06385909826285, -15.063880955829054, -15.06390335740103, -15.06392631627279, -15.063949846050415, -15.063973960658755, -15.06399867434811, -15.064024001701155, -15.064049957639824, -15.064076557432445, -15.064103816700845, -15.064131751427645, -15.064160377963624, -15.064189713035125, -15.06421977375167, -15.064250577613524, -15.064282142519495, -15.06431448677471, -15.06434762909847, -15.06438158863231, -15.064416384947956, -15.06445203805549, -15.064488568411505, -15.064525996927324, -15.06456434497734, -15.064603634407275, -15.064643887542585, -15.064685127196874, -15.06472737668026, -15.064770659807875, -15.064815000908235, -15.064860424831695, -15.064906956958865, -15.06495462320894, -15.06500345004808, -15.065053464497636, -15.06510469414236, -15.06515716713851, -15.065210912221804, -15.065265958715345, -15.06532233653728, -15.06538007620837, -15.065439208859365, -15.065499766238085, -15.06556178071638, -15.065625285296704, -15.06569031361848, -15.06575689996408, -15.06582507926444, -15.065894887104335, -15.06596635972714, -15.066039534039176, -15.066114447613485, -15.0661911386931, -15.066269646193655, -15.066350009705346, -15.066432269494195, -15.06651646650252, -15.066602642348585, -15.066690839325345, -15.066781100398234, -15.06687346920191, -15.066967990035906, -15.067064707859085, -15.06716366828279, -15.067264917562674, -15.067368502589035, -15.067474470875645, -15.06758287054681, -15.067693750322785, -15.06780715950319, -15.06792314794846, -15.06804176605914, -15.068163064752856, -15.0682870954389, -15.06841390999019, -15.06854356071249, -15.06867610031072, -15.068811581852085, -15.068950058726035, -15.069091584600605, -15.069236213375104, -15.06938399912886, -15.06953499606575, -15.069689258454394, -15.06984684056359, -15.070007796592785, -15.07017218059737, -15.070340046408315, -15.070511447546036, -15.070686437127945, -15.07086506776944, -15.07104739147801, -15.071233459539904, -15.07142332239908, -15.071617029527935, -15.07181462928937, -15.072016168789714, -15.072221693721945, -15.07243124819878, -15.072644874574944, -15.072862613258115, -15.073084502507875, -15.073310578222024, -15.073540873709605, -15.073775419449865, -15.07401424283635, -15.074257367905545, -15.074504815048895, -15.07475660070765, -15.07501273704939, -15.07527323162534, -15.0755380870075, -15.075807300404374, -15.076080863254395, -15.076358760795666, -15.07664097161087, -15.076927467146074, -15.07721821120205, -15.077513159396625, -15.077812258596705, -15.078115446318225, -15.078422650092575, -15.078733786797605, -15.07904876195156, -15.079367468967956, -15.07968978836943, -15.080015586958545, -15.080344716943324, -15.08067701501529, -15.081012301377555, -15.081350378720606, -15.08169103114304, -15.0820340230146, -15.08237909777869, -15.08272597669131, -15.083074357493375, -15.08342391301311, -15.08377428969517, -15.08412510605288, -15.084475951039945, -15.08482638233769, -15.0851759245539, -15.08552406732884, -15.08587026334431, -15.08621392623097, -15.086554428369165, -15.086891098578375, -15.087223219689895, -15.08755002599763, -15.08787070058105, -15.08818437249465, -15.08849011381778, -15.08878693655845, -15.089073789404575, -15.089349554315795, -15.08961304294874, -15.0898629929084, -15.090098063817955, -15.09031683319905, -15.09051779215444, -15.09069934084434, -15.09085978374782, -15.090997324700044, -15.09111006169603, -15.09119598145115, -15.09125295370849, -15.091278725282605, -15.09127091382917, -15.091227001329665, -15.091144327279736, -15.091020081569905, -15.090851297046814, -15.090634841742915, -15.090367410762495, -15.090045517811335, -15.089665486357415, -15.089223440409725, -15.088715294902055, -15.088136745668665, -15.08748325899847, -15.086750060754476, -15.08593212504511, -15.08502416243417, -15.084020607676415, -15.082915606965726, -15.081703004683305, -15.080376329633696, -15.0789287807569, -15.077353212305455, -15.07564211847617, -15.073787617487024, -15.071781435090864, -15.069614887518846, -15.0672788638479, -15.06476380778826, -15.062059698889184, -15.059156033162944, -15.05604180312999, -15.052705477290916, -15.049134979034164, -15.045317664992124, -15.04124030286232, -15.0368890487151, -15.032249423814365, -15.02730629098359, -15.02204383055588, -15.016445515953656, -15.010494088951654, -15.004171534685335, -14.997459056476265, -14.990337050556395, -14.982785080784575, -14.974781853460705, -14.966305192356556, -14.95733201409652, -14.94783830403723, -14.937799092811565, -14.92718843372054, -14.91597938117539, -14.90414397041209, -14.8916531987218, -14.87847700846244, -14.8645842721392, -14.84994277986531, -14.834519229537445, -14.81827922008419, -14.801187248168805, -14.783206708750136, -14.7642998999265, -14.74442803250681, -14.723551244769615, -14.7016286228846, -14.6786182274799, -14.65447712684373, -14.629161437247065, -14.60262637086635, -14.574826291769824, -14.545714780407016, -14.51524470700833, -14.483368314258925, -14.45003730955879, -14.415202967118326, -14.378816240066115, -14.340827882664225, -14.30118858263592, -14.2598491035141, -14.21676043681653, -14.171873963749245, -14.12514162603489, -14.07651610536036, -14.025951010842645, -13.97340107382482, -13.91882234923985, -13.86217242272103, -13.80341062259597, -13.74249823587895, -13.67939872737373, -13.61407796101599, -13.54650442262039, -13.476649443247736, -13.404487422471664, -13.32999605089452, -13.253156531334195, -13.17395354756859, -13.092374123088065, -13.00840755073221, -12.92204576540505, -12.83328363367472, -12.74211925605057, -12.64855428088005, -12.552594228685185, -12.45424882562652, -12.3535323446426, -12.250463952663695, -12.145068062137145, -12.03737468493033, -11.927419786492605, -11.81524563795994, -11.700901163676004, -11.58444228137718, -11.465932232049886, -11.34544189621202, -11.223050093097266, -11.09884385893059, -10.972918700171425, -10.845378817269335, -10.71633729411995, -10.585916248025885, -10.45424693455549, -10.321469801245815, -10.18773448361568, -10.053199736432925, -9.918033292616135, -9.78241164154411, -9.646519717892305, -9.510550491422155, -9.37470444741814, -9.239188946714435, -9.10421745349821, -8.9700086183496, -8.83678520332967, -8.70477283541812, -8.574198574324946, -8.445289280773306, -8.318269771928925, -8.193360751938165, -8.07077650778049, -7.950722364156015, -7.83339189629465, -7.71896390684319, -7.60759678554667, -7.499416628213545, -7.394518445991055, -7.29296415640511, -7.194778469869805, -7.099944904075335, -7.008402093222385, -6.92004060671455, -6.83470054900767, -6.752170280226865, -6.67218667744718, -6.594437450938045, -6.51856614021279, -6.444180543766875, -6.37086548678207, -6.298201006305685, -6.225787237827465, -6.153277526200395, -6.080421564234145, -6.007120692260275, -5.93346074448263, -5.859912989781925, -5.787119961957755, -5.715231187449215, -5.64423543272398, -5.574121603922235, -5.50487874510095, -5.43649603650176, -5.368962792845235, -5.302268461646725, -5.236402621556425, -5.171354980721535, -5.10711537517017, -5.04367376721673, -4.9810202438880085, -4.919145015369835, -4.858038413473764, -4.797690890123314, -4.738093015859701, -4.679235478366386, -4.621109081012463, -4.563704741414295, -4.507013490015346, -4.451026468683844, -4.395734929327942, -4.341130232528324, -4.2872038461878645, -4.233947344197662, -4.181352405121756, -4.129410810834117, -4.078114445413519, -4.02745529369778, -3.977425440089728, -3.928017067321652, -3.879222455233702, -3.8310339795676205, -3.7834441107753265, -3.7364454128424094, -3.6900305421262316, -3.644192246208387, -3.598923362761544, -3.554216818430252, -3.5100656277257274, -3.4664628919343614, -3.4234017980397313, -3.3808756176581, -3.338877705987037, -3.297401500767173, -3.256440521256848, -3.2159883672194374, -3.176038717923341, -3.136585331154319, -3.097622042240145, -3.059142763087381, -3.021141481230061, -2.9836122588902705, -2.9465492320503217, -2.9099466095365023, -2.873798672114212, -2.838099771594278, -2.802844329950462, -2.768026838447843, -2.733641856782085, -2.6996840122293926, -2.6661479988069763, -2.6330285764440293, -2.6003205701629115, -2.568018869270578, -2.5361184265600274, -2.5046142575216415, -2.4735014395643793, -2.4427751112465845, -2.4124304715163887, -2.382462778961558, -2.3528673510685936, -2.323639563491135, -2.294774849327356, -2.2662686984063947, -2.238116656583642, -2.210314325044749, -2.1828573596183327, -2.155741470097168, -2.128962419567846, -2.1025160237487563, -2.0763981503362654, -2.050604718359062, -2.025131697540475, -1.9999751076687455, -1.97513101797512, -1.95059554651964, -1.926364859584605, -1.902435171075527, -1.878802741929551, -1.8554638795312375, -1.83241493713555, -1.809652313298079, -1.787172451312283, -1.764971838653754, -1.743047006431392, -1.721394528845357, -1.7000110226518095, -1.6788931466342465, -1.65803760108144, -1.637441127271863, -1.617100506964479, -1.597012561895913, -1.5771741532838186, -1.557582181336449, -1.5382335847683104, -1.5191253403218146, -1.500254462294907, -1.481618002074527, -1.463213047675888, -1.445036723287483, -1.4270861888217086, -1.409358639471122, -1.391851305270164, -1.3745614506623505, -1.3574863740728464, -1.3406234074863186, -1.323969916030074, -1.307523297562337, -1.291280982265665, -1.2752404322454196, -1.2593991411331924, -1.243754633695204, -1.2283044654455224, -1.2130462222641165, -1.1979775200196485, -1.183096004196935, -1.1683993495290645, -1.1538852596340534, -1.13955146665604, -1.1253957309109315, -1.1114158405364345, -1.0976096111464615, -1.083974885489803, -1.0705095331130554, -1.057211450027744, -1.044078558381551, -1.0311088061336635, -1.0183001667341256, -1.0056506388071875, -0.9931582458385955, -0.980821035866742, -0.968637081177682, -0.9566044780039085, -0.944721346226894, -0.93298582908332, -0.9213960928749425, -0.909950326682085, -0.898646742080666, -0.887483572862761, -0.8764590747606315, -0.865571525174168, -0.8548192229017435, -0.844200487874382, -0.833713660893256, -0.8233571033704306, -0.8131291970728225, -0.8030283438693615, -0.7930529654812575, -0.783201503235448, -0.7734724178208735, -0.763864187396877, -0.754375314107159, -0.7450043134874546, -0.73574972130056, -0.726610091498291, -0.717583995995544, -0.708670024447093, -0.699866782342165, -0.6911728976728935, -0.682587010161207, -0.674107778245767, -0.6657338770306505, -0.6574640005018665, -0.6492968513856066, -0.6412311562342415, -0.6332656547658765, -0.625399102354109, -0.6176302686042175, -0.6099579421868365, -0.602380922938875, -0.594898026935659, -0.587508084959467, -0.58020994231683, -0.573002458658111, -0.565884507799328, -0.558854977546176, -0.5519127695202545, -0.5450567989874345, -0.538285994688372, -0.531599298671122, -0.5249956661258265, -0.518474065222647, -0.5120334769457034, -0.505672894941272, -0.49939132535547587, -0.49318778668028485, -0.48706130960016175, -0.48101093684059804, -0.4750357230185388, -0.46913473449467, -0.46330704922752686, -0.45755175662943215, -0.45186795742420705, -0.44625476350666093, -0.440711297803826, -0.435236694137906, -0.42983009709094205, -0.4244906618711447, -0.4192175541808967, -0.4140099500863951, -0.40886703588890355, -0.40378800799761666, -0.39877207280409277, -0.3938184465582516, -0.3889263552459158, -0.38409503446786325, -0.37932372932039443, -0.3746116942773721, -0.36995819307373334, -0.3653624985904487, -0.36082389274090393, -0.35634166635870257, -0.3519151190868526, -0.3475435592683361, -0.34322630383803865, -0.33896267821601456, -0.3347520162020875, -0.33059365987175016, -0.32648695947336515, -0.3224312733266413, -0.31842596772236575, -0.31447041682339005, -0.3105640025668381, -0.3067061145675339, -0.3028961500226299, -0.29913351361741336, -0.29541761743229206, -0.2917478808509263, -0.2881237304695086, -0.2845446000071696, -0.2810099302174917, -0.27751916880113, -0.27407177031951047, -0.2706671961096063, -0.2673049141997713, -0.2639843992266157, -0.2607051323529202, -0.25746660118656356, -0.25426829970046166, -0.2511097281535013, -0.2479903930124512, -0.2449098068748501, -0.24186748839284605, -0.2388629621979861, -0.23589575882694055, -0.2329654146481449, -0.2300714717893603, -0.2272134780661268, -0.2243909869111101, -0.2216035573043258, -0.21885075370422566, -0.21613214597964764, -0.2134473093426032, -0.2107958242819051, -0.20817727649761886, -0.2055912568363249, -0.20303736122719065, -0.2005151906188308, -0.1980243509169561, -0.1955644529227963, -0.1931351122722842, -0.1907359493760015, -0.1883665893598642, -0.1860266620065487, -0.18371580169764626, -0.18143364735653145, -0.17917984239194604, -0.17695403464227866, -0.17475587632053946, -0.1725850239600194, -0.1704411383606196, -0.1683238845358542, -0.16623293166050515, -0.16416795301893125, -0.16212862595401914, -0.16011463181676536, -0.1581256559164889, -0.1561613874716586, -0.1542215195613332, -0.15230574907720576, -0.15041377667623976, -0.14854530673389885, -0.14670004729795294, -0.1448777100428606, -0.1430780102247188, -0.1413006666367687, -0.13954540156545936, -0.13781194074705194, -0.13610001332476654, -0.1344093518064611, -0.13273969202283306, -0.1310907730861448, -0.12946233734945756, -0.127854130366374, -0.1262659008512818, -0.12469740064008725, -0.12314838465144184, -0.12161861084844555, -0.12010784020082906, -0.11861583664760544, -0.11714236706018365, -0.1156872012059432, -0.1142501117122586, -0.1128308740309725, -0.1114292664033101, -0.11004506982522765, -0.108678068013194, -0.1073280473703947, -0.1059947969533574, -0.1046781084389924, -0.10337777609204005, -0.1020935967329259, -0.1008253697060124, -0.09957289684824605, -0.0983359824581951, -0.0971144332654691, -0.0959080584005219, -0.09471666936482626, -0.0935400800014212, -0.09237810646582535, -0.0912305671973093, -0.09009728289052805, -0.0889780764675028, -0.0878727730499527, -0.0867811999319704, -0.085703186553035, -0.0846385644713632, -0.0835871673375885, -0.08254883086876955, -0.081523392822721, -0.08051069297266135, -0.07951057308217915, -0.0785228768805066, -0.07754745003810246, -0.0765841401425384, -0.07563279667468314, -0.07469327098518465, -0.07376541627124215, -0.0728490875536688, -0.0719441416542385, -0.071050437173313, -0.0701678344677492 + ], + "atomic_wave_functions": [ + { + "occupation": 2.0, + "radial_function": [ + -0.0002330888701227573, -0.0002360207671226897, -0.0002389895428447579, -0.0002419956611660974, -0.0002450395917986962, -0.0002481218103627887, -0.0002512427984611712, -0.0002544030437544549, -0.0002576030400372615, -0.0002608432873153816, -0.0002641242918838999, -0.0002674465664063053, -0.0002708106299945948, -0.0002742170082903857, -0.0002776662335470497, -0.0002811588447128757, -0.0002846953875152825, -0.0002882764145460901, -0.000291902485347862, -0.0002955741665013357, -0.0002992920317139503, -0.0003030566619094908, -0.0003068686453188574, -0.0003107285775719774, -0.0003146370617908742, -0.000318594708683904, -0.0003226021366411821, -0.0003266599718312062, -0.0003307688482986958, -0.0003349294080636635, -0.0003391423012217305, -0.0003434081860457058, -0.0003477277290884415, -0.000352101605286983, -0.0003565304980680291, -0.0003610150994547177, -0.0003655561101747556, -0.0003701542397699076, -0.0003748102067068639, -0.0003795247384895019, -0.0003842985717725585, -0.0003891324524767331, -0.0003940271359052395, -0.0003989833868618224, -0.0004040019797702597, -0.0004090836987953651, -0.0004142293379655167, -0.0004194397012967237, -0.0004247156029182549, -0.0004300578671998475, -0.0004354673288805158, -0.000440944833198979, -0.0004464912360257309, -0.000452107403996771, -0.0004577942146490176, -0.000463552556557422, -0.0004693833294738098, -0.0004752874444674683, -0.0004812658240675005, -0.000487319402406974, -0.0004934491253688746, -0.0004996559507339064, -0.0005059408483301425, -0.0005123048001845609, -0.000518748800676491, -0.0005252738566929804, -0.0005318809877861247, -0.0005385712263323716, -0.0005453456176938303, -0.0005522052203816102, -0.0005591511062212138, -0.0005661843605200082, -0.000573306082236806, -0.0005805173841535761, -0.0005878193930493197, -0.0005952132498761248, -0.0006027001099374426, -0.0006102811430686036, -0.0006179575338196027, -0.0006257304816401883, -0.0006336012010672716, -0.0006415709219147006, -0.0006496408894654175, -0.0006578123646660327, -0.0006660866243238502, -0.0006744649613063628, -0.0006829486847432667, -0.0006915391202310098, -0.0007002376100399143, -0.0007090455133239104, -0.0007179642063328969, -0.0007269950826277813, -0.000736139553298223, -0.0007453990471831135, -0.0007547750110938323, -0.0007642689100403068, -0.0007738822274599202, -0.0007836164654492956, -0.0007934731449989966, -0.0008034538062311812, -0.0008135600086402391, -0.0008237933313364646, -0.0008341553732927871, -0.0008446477535946069, -0.0008552721116927776, -0.0008660301076597615, -0.0008769234224490156, -0.0008879537581576354, -0.0008991228382923032, -0.0009104324080385846, -0.0009218842345336023, -0.000933480107142152, -0.0009452218377362844, -0.0009571112609784005, -0.0009691502346079187, -0.000981340639731533, -0.0009936843811171345, -0.001006183387491422, -0.001018839611841257, -0.001031655031718816, -0.001044631649550567, -0.001057771492950146, -0.001071076615035158, -0.001084549094747974, -0.001098191037180549, -0.001112004573903335, -0.001125991863298326, -0.0011401550908963, -0.00115449646971829, -0.001169018240621359, -0.001183722672648718, -0.001198612063384256, -0.001213688739311514, -0.001228955056177191, -0.001244413399359213, -0.001260066184239426, -0.001275915856580989, -0.001291964892910501, -0.001308215800904944, -0.001324671119783486, -0.001341333420704213, -0.00135820530716585, -0.001375289415414539, -0.001392588414855724, -0.001410105008471226, -0.001427841933241557, -0.001445801960573544, -0.001463987896733333, -0.001482402583284847, -0.001501048897533745, -0.001519929752976966, -0.001539048099757934, -0.001558406925127483, -0.001578009253910578, -0.001597858148978911, -0.001617956711729429, -0.001638308082568891, -0.001658915441404513, -0.001679782008140787, -0.001700911043182541, -0.001722305847944335, -0.001743969765366251, -0.001765906180436179, -0.001788118520718668, -0.001810610256890427, -0.001833384903282556, -0.001856446018429601, -0.001879797205625518, -0.001903442113486608, -0.001927384436521562, -0.001951627915708634, -0.001976176339080119, -0.002001033542314134, -0.002026203409333874, -0.0020516898729144, -0.002077496915297012, -0.002103628568811435, -0.002130088916505729, -0.002156882092784189, -0.002184012284053247, -0.002211483729375473, -0.002239300721131838, -0.002267467605692266, -0.002295988784094647, -0.002324868712732373, -0.00235411190405051, -0.002383722927250745, -0.00241370640900517, -0.00244406703417906, -0.002474809546562745, -0.002505938749612639, -0.002537459507201649, -0.00256937674437895, -0.002601695448139381, -0.002634420668202462, -0.002667557517801203, -0.002701111174480892, -0.002735086880907831, -0.002769489945688316, -0.002804325744197881, -0.002839599719420949, -0.002875317382801093, -0.002911484315101904, -0.002948106167278742, -0.002985188661361448, -0.003022737591348081, -0.003060758824109991, -0.003099258300308163, -0.003138242035321158, -0.0031777161201847, -0.003217686722543004, -0.003258160087612178, -0.003299142539155617, -0.003340640480471741, -0.003382660395394099, -0.003425208849304045, -0.003468292490156168, -0.003511918049516559, -0.003556092343614177, -0.003600822274405408, -0.003646114830651956, -0.003691977089012362, -0.003738416215147153, -0.003785439464837932, -0.003833054185120524, -0.003881267815432283, -0.003930087888773944, -0.003979522032885956, -0.004029577971439641, -0.004080263525243349, -0.004131586613463654, -0.004183555254862028, -0.004236177569046935, -0.004289461777741707, -0.004343416206068361, -0.004398049283847447, -0.004453369546914355, -0.004509385638452037, -0.004566106310340517, -0.004623540424523351, -0.004681696954391202, -0.004740584986182852, -0.00480021372040375, -0.004860592473262381, -0.004921730678124677, -0.004983637886986689, -0.005046323771965741, -0.00510979812681029, -0.005174070868428737, -0.005239152038437445, -0.005305051804728163, -0.005371780463055141, -0.005439348438642123, -0.005507766287809567, -0.005577044699622225, -0.005647194497557416, -0.005718226641194233, -0.00579015222792391, -0.005862982494681692, -0.0059367288197004, -0.006011402724285967, -0.00608701587461529, -0.006163580083556571, -0.006241107312512546, -0.006319609673286776, -0.006399099429973352, -0.006479589000870292, -0.006561090960416919, -0.006643618041155513, -0.006727183135717543, -0.00681179929883483, -0.00689747974937587, -0.00698423787240767, -0.007072087221283449, -0.007161041519756455, -0.007251114664120276, -0.007342320725375944, -0.007434673951426188, -0.007528188769297128, -0.007622879787387854, -0.007718761797748092, -0.007815849778384436, -0.007914158895595363, -0.008013704506335576, -0.008114502160609861, -0.008216567603896874, -0.00831991677960328, -0.00842456583154858, -0.008530531106481004, -0.008637829156624912, -0.008746476742259913, -0.008856490834332336, -0.008967888617099292, -0.009080687490805745, -0.009194905074395144, -0.00931055920825373, -0.009427667956989256, -0.009546249612244366, -0.009666322695545052, -0.009787905961184753, -0.009911018399144324, -0.01003567923804854, -0.0101619079481594, -0.01028972424440682, -0.01041914808945716, -0.01055019969681974, -0.01068289953399248, -0.01081726832564635, -0.01095332705684971, -0.0110910969763328, -0.01123059959979269, -0.01137185671323963, -0.01151489037638485, -0.0116597229260705, -0.01180637697974241, -0.01195487543896584, -0.01210524149298508, -0.01225749862232711, -0.01241167060245026, -0.01256778150743793, -0.01272585571373835, -0.0128859179039506, -0.0130479930706577, -0.01321210652030721, -0.01337828387713993, -0.01354655108716725, -0.0137169344221978, -0.01388946048391397, -0.01406415620799882, -0.01424104886831412, -0.01442016608113003, -0.01460153580940703, -0.01478518636713077, -0.01497114642370053, -0.01515944500837174, -0.0153501115147534, -0.01554317570536084, -0.01573866771622472, -0.01593661806155674, -0.01613705763847283, -0.01634001773177453, -0.0165455300187889, -0.01675362657426828, -0.01696433987534994, -0.01717770280657683, -0.01739374866497979, -0.01761251116522218, -0.01783402444480742, -0.01805832306935038, -0.01828544203791322, -0.01851541678840667, -0.01874828320305683, -0.01898407761393922, -0.01922283680858014, -0.01946459803562619, -0.01970939901058305, -0.0199572779216239, -0.02020827343546863, -0.02046242470333434, -0.02071977136695822, -0.02098035356469326, -0.02124421193767792, -0.02151138763608038, -0.0217819223254185, -0.02205585819295584, -0.02233323795417496, -0.02261410485932865, -0.02289850270007019, -0.02318647581616317, -0.02347806910227187, -0.02377332801483331, -0.02407229857901119, -0.02437502739573354, -0.02468156164881405, -0.02499194911215853, -0.02530623815705726, -0.02562447775956394, -0.02594671750796243, -0.02627300761032209, -0.02660339890214243, -0.02693794285408838, -0.02727669157981657, -0.027619697843894, -0.02796701506980988, -0.02831869734808152, -0.0286747994444551, -0.02903537680820268, -0.02940048558051542, -0.02977018260299541, -0.03014452542624562, -0.0305235723185597, -0.0309073822747126, -0.03129601502485214, -0.03168953104349358, -0.03208799155861729, -0.0324914585608708, -0.03289999481287616, -0.03331366385864338, -0.03373253003309081, -0.03415665847167374, -0.03458611512012182, -0.03502096674428644, -0.03546128094009833, -0.03590712614363734, -0.0363585716413143, -0.03681568758016634, -0.03727854497826667, -0.03774721573524891, -0.03822177264294774, -0.03870228939615628, -0.03918884060350077, -0.03968150179843392, -0.04018034945034703, -0.04068546097580256, -0.04119691474988678, -0.04171479011768409, -0.04223916740587369, -0.04277012793444846, -0.04330775402855799, -0.04385212903047517, -0.04440333731168772, -0.04496146428511513, -0.04552659641745115, -0.04609882124163296, -0.04667822736943704, -0.04726490450420247, -0.04785894345368215, -0.04846043614302158, -0.04906947562786654, -0.04968615610759954, -0.05031057293870519, -0.05094282264826453, -0.05158300294757882, -0.05223121274592328, -0.05288755216442926, -0.05355212255009646, -0.05422502648993491, -0.05490636782523506, -0.05559625166596761, -0.05629478440531175, -0.05700207373431163, -0.05771822865666084, -0.05844335950361362, -0.05917757794902351, -0.0599209970245073, -0.06067373113473408, -0.06143589607283912, -0.06220760903595996, -0.06298898864089517, -0.06378015493988365, -0.0645812294365035, -0.06539233510168856, -0.06621359638986075, -0.06704513925517823, -0.06788709116789454, -0.06873958113082913, -0.06960273969594653, -0.07047669898104, -0.07136159268652001, -0.07225755611230206, -0.07316472617479321, -0.0740832414239723, -0.0750132420605616, -0.07595486995328667, -0.07690826865621937, -0.07787358342620113, -0.07885096124034191, -0.07984055081359027, -0.08084250261637001, -0.08185696889227813, -0.0828841036758394, -0.0839240628103107, -0.08497700396553086, -0.0860430866558087, -0.0871224722578427, -0.08821532402866618, -0.08932180712361001, -0.09044208861427526, -0.09157633750650901, -0.09272472475837246, -0.09388742329809498, -0.09506460804200408, -0.09625645591241945, -0.09746314585550485, -0.09868485885906379, -0.09992177797026885, -0.1011740883133141, -0.102441977106976, -0.1037256336820716, -0.1050252494988006, -0.1063410181639559, -0.1076731354479896, -0.1090217993019169, -0.1103872098740439, -0.1117695695265022, -0.1131690828515695, -0.1145859566877638, -0.1160204001356865, -0.1174726245735967, -0.1189428436726968, -0.1204312734121048, -0.1219381320934938, -0.1234636403553723, -0.125008021186982, -0.1265714999417871, -0.1281543043505278, -0.1297566645338112, -0.1313788130142075, -0.1330209847278255, -0.1346834170353314, -0.1363663497323809, -0.1380700250594297, -0.1397946877108841, -0.1415405848435587, -0.1433079660843996, -0.1450970835374329, -0.1469081917898983, -0.1487415479175221, -0.1505974114888861, -0.1524760445688437, -0.1543777117209355, -0.1563026800087524, -0.1582512189961931, -0.1602236007465626, -0.1622200998204521, -0.164240993272342, -0.1662865606458676, -0.1683570839676798, -0.1704528477398383, -0.1725741389306643, -0.1747212469639844, -0.1768944637066878, -0.1790940834545213, -0.1813204029160412, -0.1835737211946392, -0.1858543397685549, -0.1881625624687858, -0.1904986954548004, -0.1928630471879605, -0.1952559284025497, -0.1976776520743045, -0.2001285333863434, -0.2026088896923784, -0.2051190404770954, -0.2076593073135833, -0.2102300138176866, -0.2128314855991554, -0.2154640502094564, -0.2181280370861075, -0.2208237774933958, -0.2235516044593276, -0.2263118527086588, -0.2291048585918463, -0.2319309600097582, -0.2347904963339706, -0.2376838083224767, -0.2406112380306266, -0.2435731287171092, -0.2465698247447822, -0.2496016714761512, -0.2526690151632858, -0.2557722028319661, -0.2589115821598273, -0.2620875013482845, -0.2653003089879943, -0.2685503539176111, -0.2718379850755886, -0.2751635513447623, -0.2785274013894471, -0.281929883484776, -0.2853713453379907, -0.2888521339013905, -0.2923725951766379, -0.29593307401011, -0.299533913878969, -0.3031754566676275, -0.306858042434258, -0.310582009167006, -0.3143476925295393, -0.3181554255955628, -0.3220055385719167, -0.3258983585098685, -0.3298342090041903, -0.3338134098796151, -0.3378362768642351, -0.3419031212494197, -0.3460142495357935, -0.3501699630648219, -0.3543705576355364, -0.3586163231059083, -0.3629075429783897, -0.3672444939691109, -0.3716274455602149, -0.376056659534815, -0.380532389494022, -0.3850548803555021, -0.3896243678330046, -0.3942410778962799, -0.3989052262108193, -0.4036170175568138, -0.4083766452267367, -0.4131842904009446, -0.4180401215006583, -0.4229442935177186, -0.4278969473204668, -0.4328982089351089, -0.4379481888019223, -0.4430469810056325, -0.4481946624793199, -0.4533912921811851, -0.4586369102434993, -0.4639315370930876, -0.4692751725426618, -0.4746677948523526, -0.4801093597607691, -0.4855997994849308, -0.4911390216884222, -0.4967269084171222, -0.5023633150018941, -0.5080480689275944, -0.5137809686678207, -0.519561782484811, -0.5253902471939234, -0.5312660668921766, -0.5371889116503394, -0.5431584161680916, -0.5491741783918251, -0.5552357580946862, -0.5613426754185068, -0.5674944093773333, -0.5736903963222949, -0.5799300283676403, -0.5862126517778136, -0.5925375653155424, -0.5989040185509602, -0.6053112101318984, -0.6117582860155621, -0.6182443376619063, -0.6247684001891632, -0.6313294504920565, -0.6379264053233922, -0.64455811933986, -0.6512233831129874, -0.6579209211064269, -0.664649389620842, -0.6714073747078989, -0.6781933900550701, -0.6850058748431177, -0.6918431915784158, -0.6987036239024534, -0.7055853743811535, -0.7124865622768987, -0.7194052213064286, -0.7263392973881206, -0.7332866463824385, -0.7402450318296981, -0.747212122689685, -0.7541854910879746, -0.7611626100742752, -0.7681408513984899, -0.7751174833106432, -0.7820896683913185, -0.7890544614196603, -0.79600880728661, -0.802949538961474, -0.8098733755205405, -0.8167769202470285, -0.8236566588122307, -0.830508957548363, -0.8373300618242508, -0.8441160945356949, -0.8508630547229938, -0.8575668163288444, -0.8642231271105784, -0.8708276077214119, -0.8773757509761614, -0.8838629213176754, -0.8902843545009844, -0.8966351575130141, -0.902910308746483, -0.9091046584474145, -0.9152129294565358, -0.9212297182655884, -0.9271494964104097, -0.9329666122233855, -0.9386752929686601, -0.9442696473841873, -0.9497436686553827, -0.9550912378458122, -0.9603061278108935, -0.9653820076211097, -0.9703124475216861, -0.9750909244560081, -0.9797108281802951, -0.9841654679971805, -0.9884480801358029, -0.9925518358058715, -0.9964698499527951, -1.00019519074045, -1.003720889787398, -1.007039953181363, -1.010145373295559, -1.01303014142888, -1.015687261290106, -1.018109763344106, -1.020290720035408, -1.022223261901504, -1.023900594584839, -1.025316016748514, -1.026462938896279, -1.027334903092432, -1.02792560357163, -1.028228908222437, -1.028238880921504, -1.027949804687731, -1.027356205617362, -1.026452877551879, -1.025234907420536, -1.023697701188576, -1.021837010330437, -1.019648958734581, -1.017130069932987, -1.014277294533735, -1.011088037719534, -1.007560186658498, -1.003692137655844, -0.9994828228566685, -0.9949317362904293, -0.9900389590273078, -0.9848051831953187, -0.9792317345849327, -0.9733205935451102, -0.9670744138513279, -0.960496539202197, -0.9535910169772079, -0.9463626088638525, -0.9388167979382808, -0.9309597917600017, -0.9227985210181181, -0.9143406332447976, -0.9055944810912171, -0.8965691046427797, -0.887274207234426, -0.877720124213778, -0.8679177840905629, -0.8578786615056163, -0.8476147214528114, -0.8371383541931351, -0.8264623003127564, -0.815599565397314, -0.8045633238237186, -0.7933668112097271, -0.7820232051111874, -0.7705454936189797, -0.7589463315829783, -0.747237884280255, -0.7354316584505962, -0.7235383207455248, -0.7115675037784561, -0.6995276001249626, -0.6874255448042093, -0.6752665869767143, -0.6630540518206687, -0.6507907976472342, -0.6384849062336991, -0.6261458814089714, -0.6137831305499066, -0.6014059544730429, -0.5890235375753159, -0.5766449382337145, -0.5642790794733332, -0.5519347399127977, -0.5396205449955938, -0.5273449579992058, -0.5151162677385938, -0.5029425903969875, -0.4908318574787936, -0.4787918080209436, -0.4668299816951487, -0.4549537122445607, -0.4431701212607933, -0.4314861123066161, -0.4199083653893608, -0.4084433317898326, -0.3970972292512219, -0.3858760375322576, -0.3747854943285564, -0.3638310915658465, -0.3530180720684424, -0.3423514266060474, -0.3318358913216467, -0.321475945542899, -0.3112758099790962, -0.3012394453053668, -0.2913705511353928, -0.2816725653834874, -0.2721486640164089, -0.2628017611948009, -0.2536345098036328, -0.2446493023704486, -0.235848272369666, -0.22723329591054, -0.2188059938057661, -0.2105677340170219, -0.2025196344730422, -0.1946625662550933, -0.1869971571439535, -0.1795237955217359, -0.1722426346210897, -0.1651535971134994, -0.1582563800275858, -0.1515504599874691, -0.1450350987604214, -0.1387093491021995, -0.1325720608876052, -0.1266218875130085, -0.120857292556752, -0.115276556682562, -0.1098777847703367, -0.1046589132579361, -0.09961771767690736, -0.09475182036441349, -0.09005869833302851, -0.08553569127949534, -0.08118000971303851, -0.07698874318338225, -0.07295886858824013, -0.0690872585397353, -0.06537068976897267, -0.06180585154781909, -0.05838935410687062, -0.05511773702857976, -0.05198747759460194, -0.04899499906658582, -0.04613667887988481, -0.04340885673000882, -0.04080784253205785, -0.03832992423389053, -0.03597137546437531, -0.03372846299874524, -0.03159745402383487, -0.02957462318680533, -0.02765625941186783, -0.02583867247048589, -0.02411819929156905, -0.02249120999926292, -0.02095411366708257, -0.01950336377832556, -0.01813546338392882, -0.01684696995019392, -0.01563449989009188, -0.01449473277316165, -0.01342441521033148, -0.01242036441130995, -0.0114794714135053, -0.01059870398273352, -0.009775109187254943, -0.009005815647933955, -0.008288035468534974, -0.007619065851346034, -0.006996290404452296, -0.006417180148057377, -0.005879294228267834, -0.005380280347707636, -0.004917874923211411, -0.004489902981653466, -0.004094277805699834, -0.003729000341920497, -0.003392158384265723, -0.00308192554639244, -0.002796560036722847, -0.002534403250427321, -0.002293878192747673, -0.002073487748215506, -0.001871812810375508, -0.001687510286597005, -0.001519310992451614, -0.001366017449953734, -0.001226501603707367, -0.001099702468681702, -0.0009846237229535217, -0.0008803312583116458, -0.0007859507011226996, -0.0007006649153137556, -0.0006237114987416236, -0.0005543802835965591, -0.0004920108508356713, -0.0004359900679643853, -0.0003857496587886829, -0.0003407638130523236, -0.0003005468431574838, -0.0002646508944496345, -0.0002326637148332621, -0.0002042064887791528, -0.0001789317400910167, -0.0001565213071235686, -0.0001366843934896897, -0.0001191556966645538, -0.0001036936162927397, -9.007854343309891e-05, -7.811123143783789e-05, -6.761124865877212e-05, -5.84155127065202e-05, -5.03769055585781e-05, -4.336296842042739e-05, -3.725467489038164e-05, -3.194528066368287e-05, -2.733924773402899e-05, -2.335124081051658e-05, -1.990519346391604e-05, -1.693344134700032e-05, -1.437591969782925e-05, -1.217942223076945e-05, -1.029691844579386e-05, -8.686926340378871e-06, -7.312937486195353e-06, -6.14289143921189e-06, -5.148696472680103e-06, -4.305793667863178e-06, -3.592761457775814e-06, -2.990957794648189e-06, -2.48419720006513e-06, -2.058460055013328e-06, -1.701631595751863e-06, -1.403268194401174e-06, -1.15439128422116e-06, -9.473224127708278e-07, -7.754658048953519e-07, -6.33189891307352e-07, -5.157010553487636e-07, -4.18929424699358e-07, -3.394285780843629e-07, -2.74287715668469e-07, -2.210549595711591e-07, -1.776705632466286e-07, -1.424089155548818e-07, -1.138283265932583e-07, -9.072767768680789e-08, -7.210910724901706e-08, -5.714598749840338e-08, -4.515552429677543e-08, -3.557538376445189e-08, -2.794381497890091e-08, -2.188279817002021e-08, -1.708380261548498e-08, -1.329578816783972e-08, -1.031512928598562e-08, -7.977180886066188e-09, -6.14924156762102e-09, -4.724702110872435e-09, -3.618195880020885e-09, -2.761593200848227e-09, -2.100704196619904e-09, -1.592574242051564e-09, -1.203273396826655e-09, -9.060961603851187e-10, -6.801008869342423e-10, -5.089294380594553e-10, -3.798573402562089e-10, -2.830330537242312e-10, -2.108721336396873e-10, -1.565164679620368e-10, -1.157281589376049e-10, -8.523840145909926e-11, -6.25356431351769e-11, -4.56977245386155e-11, -3.325940305445241e-11, -2.410822638199895e-11, -1.740302514125991e-11, -1.251037621192407e-11, -8.955282791079742e-12, -6.383052859434031e-12, -4.529960111331856e-12, -3.200762276995537e-12, -2.251549887162308e-12, -1.57671988518344e-12, -1.099126634692554e-12, -7.626692311798814e-13, -5.267381847489812e-13, -3.620744859627849e-13, -2.476964169841692e-13, -1.686296819668283e-13, -1.142389746385809e-13, -7.700762001746512e-14, -5.164937241164712e-14, -3.446520053157127e-14, -2.287984663419027e-14, -1.510962622085413e-14, -9.925538195863312e-15, -6.485227280842956e-15, -4.214420102858146e-15, -2.723718773551784e-15, -1.750525628347122e-15, -1.118731917520956e-15, -7.108928138314068e-16, -4.491294838844388e-16, -2.820958963645554e-16, -1.761358455574136e-16, -1.093181115900379e-16, -6.743682154418829e-17, -4.134561528412292e-17, -2.519159106057917e-17, -1.525251506611588e-17, -9.175982580567062e-18, -5.484714727715528e-18, -3.256945615083029e-18, -1.921260164759644e-18, -1.125757418369455e-18, -6.551635506074926e-19, -3.786724862601333e-19, -2.173448038106188e-19, -1.238705770940811e-19, -7.009416568198374e-20, -3.937778906207506e-20, -2.196023477695678e-20, -1.215622614489831e-20, -6.678764781039878e-21, -3.641564829324613e-21, -1.970302377250215e-21, -1.057763433175872e-21, -5.633942566849214e-22, -2.976879863731721e-22, -1.560240383504226e-22, -8.110708067703003e-23, -4.181366681046539e-23, -2.137590857706324e-23, -1.083507201578443e-23, -5.444943503481635e-24, -2.712452815423701e-24, -1.339339058353563e-24, -6.554357002430042e-25, -3.178571241590443e-25, -1.527378141411491e-25, -7.271496506906063e-26, -3.429358187451915e-26, -1.601999183009367e-26, -7.411737884050772e-27, -3.395731987200394e-27, -1.54045479863636e-27, -6.918511232287631e-28, -3.075882459726842e-28, -1.35352033020175e-28, -5.894427971824025e-29, -2.5400574888427e-29, -1.082961148928991e-29, -4.567634908781971e-30, -1.905548733373567e-30, -7.862110764676292e-31, -3.20765888331398e-31, -1.293915325315977e-31, -5.159767883246701e-32, -2.033753391173977e-32, -7.922215108481439e-33, -3.049374639813418e-33, -1.159646280761259e-33, -4.356371059734073e-34, -1.616374148869487e-34, -5.922552663093603e-35, -2.142680486273537e-35, -7.652757091791546e-36, -2.697864805819701e-36, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "2S", + "angular_momentum": 0 + }, + { + "occupation": 5.0, + "radial_function": [ + -2.726779581573615e-08, -2.795808334432166e-08, -2.866584558443219e-08, -2.939152491047787e-08, -3.013557489562409e-08, -3.08984605952942e-08, -3.168065883784432e-08, -3.248265852259653e-08, -3.330496092541663e-08, -3.414808001202796e-08, -3.501254275925636e-08, -3.589888948440784e-08, -3.680767418298438e-08, -3.773946487494899e-08, -3.869484395975653e-08, -3.967440858037218e-08, -4.067877099650499e-08, -4.170855896729009e-08, -4.27644161436583e-08, -4.384700247063884e-08, -4.495699459984615e-08, -4.609508631240915e-08, -4.726198895260661e-08, -4.845843187248034e-08, -4.968516288770377e-08, -5.094294874499079e-08, -5.223257560133713e-08, -5.355484951539384e-08, -5.491059695127985e-08, -5.63006652951488e-08, -5.772592338483234e-08, -5.91872620528921e-08, -6.06855946834184e-08, -6.222185778292471e-08, -6.379701156569446e-08, -6.54120405539457e-08, -6.706795419318916e-08, -6.876578748316405e-08, -7.05066016247461e-08, -7.229148468323228e-08, -7.412155226841653e-08, -7.599794823188128e-08, -7.79218453819417e-08, -7.989444621668841e-08, -8.191698367558725e-08, -8.399072191010583e-08, -8.611695707384868e-08, -8.829701813269476e-08, -9.053226769544359e-08, -9.282410286548934e-08, -9.517395611405481e-08, -9.758329617553194e-08, -1.000536289654878e-07, -1.025864985219092e-07, -1.051834879702763e-07, -1.078462205130662e-07, -1.10576360444306e-07, -1.133756141898097e-07, -1.162457313737489e-07, -1.191885059122228e-07, -1.222057771345129e-07, -1.252994309327227e-07, -1.284714009405197e-07, -1.317236697417176e-07, -1.350582701094548e-07, -1.384772862767404e-07, -1.41982855239167e-07, -1.455771680906003e-07, -1.492624713926814e-07, -1.530410685789997e-07, -1.569153213948104e-07, -1.608876513732004e-07, -1.649605413486224e-07, -1.691365370087447e-07, -1.73418248485586e-07, -1.778083519869297e-07, -1.823095914690378e-07, -1.869247803517095e-07, -1.916568032767566e-07, -1.965086179109955e-07, -2.014832567948798e-07, -2.065838292379341e-07, -2.118135232621689e-07, -2.171756075946936e-07, -2.22673433710773e-07, -2.283104379286031e-07, -2.340901435571163e-07, -2.400161630981589e-07, -2.46092200504417e-07, -2.523220534945012e-07, -2.587096159266376e-07, -2.65258880232449e-07, -2.719739399123482e-07, -2.788589920941014e-07, -2.859183401561617e-07, -2.931563964174132e-07, -3.00577684895004e-07, -3.081868441319956e-07, -3.159886300965943e-07, -3.239879191547768e-07, -3.321897111181642e-07, -3.405991323690592e-07, -3.492214390645871e-07, -3.580620204219563e-07, -3.671264020868789e-07, -3.76420249587267e-07, -3.859493718743566e-07, -3.957197249534779e-07, -4.057374156067351e-07, -4.160087052099291e-07, -4.265400136461022e-07, -4.373379233181565e-07, -4.484091832630507e-07, -4.597607133701469e-07, -4.713996087063468e-07, -4.833331439507159e-07, -4.955687779413728e-07, -5.081141583374792e-07, -5.209771263992504e-07, -5.341657218889726e-07, -5.476881880960844e-07, -5.615529769894729e-07, -5.757687545001974e-07, -5.903444059379457e-07, -6.052890415446048e-07, -6.206120021884237e-07, -6.363228652023197e-07, -6.524314503699838e-07, -6.689478260635222e-07, -6.858823155364714e-07, -7.032455033761201e-07, -7.210482421191711e-07, -7.393016590348786e-07, -7.580171630798956e-07, -7.772064520291904e-07, -7.968815197874736e-07, -8.170546638857137e-07, -8.377384931674279e-07, -8.589459356695498e-07, -8.806902467028008e-07, -9.029850171366075e-07, -9.258441818937617e-07, -9.492820286601103e-07, -9.733132068147346e-07, -9.97952736586179e-07, -1.023216018440362e-06, -1.049118842706526e-06, -1.075677399446486e-06, -1.102908288573809e-06, -1.130828530229187e-06, -1.159455575418438e-06, -1.188807316919826e-06, -1.218902100467486e-06, -1.249758736217951e-06, -1.281396510506965e-06, -1.313835197903895e-06, -1.347095073571312e-06, -1.381196925937444e-06, -1.416162069689427e-06, -1.452012359095484e-06, -1.488770201664332e-06, -1.526458572150383e-06, -1.565101026913486e-06, -1.60472171864216e-06, -1.645345411449574e-06, -1.686997496351616e-06, -1.729704007136858e-06, -1.773491636638191e-06, -1.818387753416406e-06, -1.864420418866116e-06, -1.911618404754663e-06, -1.960011211205075e-06, -2.00962908513419e-06, -2.06050303915757e-06, -2.112664870972977e-06, -2.16614718323448e-06, -2.220983403929764e-06, -2.277207807273161e-06, -2.334855535127653e-06, -2.393962618969139e-06, -2.454566002406669e-06, -2.516703564272848e-06, -2.580414142298652e-06, -2.645737557387627e-06, -2.712714638504565e-06, -2.781387248194135e-06, -2.851798308745644e-06, -2.923991829019972e-06, -2.998012931955707e-06, -3.073907882771581e-06, -3.151724117882716e-06, -3.231510274549008e-06, -3.313316221273877e-06, -3.397193088972638e-06, -3.483193302929838e-06, -3.571370615565485e-06, -3.661780140030862e-06, -3.754478384654622e-06, -3.849523288260934e-06, -3.946974256381683e-06, -4.046892198385204e-06, -4.149339565545088e-06, -4.254380390072426e-06, -4.362080325136248e-06, -4.472506685897018e-06, -4.585728491578683e-06, -4.701816508605932e-06, -4.820843294833216e-06, -4.942883244893441e-06, -5.068012636694634e-06, -5.196309679093424e-06, -5.327854560775539e-06, -5.462729500373381e-06, -5.601018797852391e-06, -5.742808887198146e-06, -5.888188390436995e-06, -6.037248173024354e-06, -6.19008140063479e-06, -6.346783597389814e-06, -6.507452705559503e-06, -6.672189146775276e-06, -6.841095884792373e-06, -7.014278489840649e-06, -7.191845204604567e-06, -7.373907011873201e-06, -7.560577703902509e-06, -7.751973953533599e-06, -7.948215387110913e-06, -8.149424659246223e-06, -8.355727529475168e-06, -8.567252940853846e-06, -8.784133100545272e-06, -9.006503562445241e-06, -9.234503311899768e-06, -9.468274852566999e-06, -9.70796429547738e-06, -9.953721450348583e-06, -1.020569991921135e-05, -1.046405719240526e-05, -1.072895474700443e-05, -1.100055814773415e-05, -1.127903715044235e-05, -1.156456580818939e-05, -1.185732258002369e-05, -1.215749044251021e-05, -1.246525700408185e-05, -1.278081462228524e-05, -1.310436052399403e-05, -1.343609692866481e-05, -1.377623117471271e-05, -1.412497584908555e-05, -1.448254892011752e-05, -1.484917387374538e-05, -1.522507985317232e-05, -1.561050180206653e-05, -1.600568061138415e-05, -1.641086326990816e-05, -1.682630301859731e-05, -1.72522595088415e-05, -1.768899896472239e-05, -1.813679434938061e-05, -1.859592553559364e-05, -1.906667948067043e-05, -1.954935040577259e-05, -2.004423997977376e-05, -2.055165750777192e-05, -2.10719201243728e-05, -2.16053529918646e-05, -2.215228950340797e-05, -2.271307149136828e-05, -2.328804944092006e-05, -2.387758270905716e-05, -2.448203974914502e-05, -2.510179834115591e-05, -2.573724582773018e-05, -2.638877935621136e-05, -2.705680612680628e-05, -2.774174364702453e-05, -2.844401999255705e-05, -2.916407407475583e-05, -2.99023559148823e-05, -3.065932692529524e-05, -3.1435460197754e-05, -3.223124079901676e-05, -3.304716607391855e-05, -3.388374595611819e-05, -3.474150328670772e-05, -3.562097414088374e-05, -3.652270816288415e-05, -3.744726890939965e-05, -3.839523420167375e-05, -3.936719648651163e-05, -4.03637632064227e-05, -4.138555717912793e-05, -4.243321698666918e-05, -4.350739737436216e-05, -4.460876965984347e-05, -4.573802215246563e-05, -4.689586058330234e-05, -4.808300854603213e-05, -4.930020794897539e-05, -5.054821947856658e-05, -5.182782307455094e-05, -5.313981841720206e-05, -5.448502542686401e-05, -5.586428477612947e-05, -5.727845841497337e-05, -5.87284301091697e-05, -6.021510599232695e-05, -6.17394151318863e-05, -6.330231010943523e-05, -6.490476761569917e-05, -6.654778906058111e-05, -6.823240119862987e-05, -6.995965677032652e-05, -7.173063515958882e-05, -7.354644306790443e-05, -7.540821520551036e-05, -7.73171150000518e-05, -7.927433532316133e-05, -8.128109923540998e-05, -8.333866075009503e-05, -8.544830561634046e-05, -8.761135212199747e-05, -8.9829151916845e-05, -9.210309085660347e-05, -9.443458986828551e-05, -9.682510583742489e-05, -9.92761325177339e-05, -0.0001017892014637564, -0.0001043658829870975, -0.000107007787136823, -0.0001097165647046406, -0.0001124939082554876, -0.0001153415531841651, -0.0001182612787986794, -0.0001212549094309589, -0.0001243243155756424, -0.0001274714150576442, -0.0001306981742292218, -0.0001340066091972922, -0.0001373987870817578, -0.0001408768273056236, -0.0001444429029177098, -0.0001480992419487783, -0.0001518481288019194, -0.0001556919056780563, -0.0001596329740374597, -0.0001636737960981755, -0.0001678168963722944, -0.0001720648632410239, -0.0001764203505695286, -0.0001808860793625542, -0.0001854648394618517, -0.000190159491286458, -0.0001949729676169162, -0.0001999082754245301, -0.0002049684977468036, -0.0002101567956102095, -0.0002154764100014934, -0.0002209306638887254, -0.0002265229642933515, -0.0002322568044145324, -0.0002381357658070796, -0.0002441635206143397, -0.0002503438338574082, -0.0002566805657820834, -0.0002631776742650188, -0.0002698392172805597, -0.0002766693554297816, -0.0002836723545333069, -0.0002908525882894811, -0.0002982145409995721, -0.0003057628103616562, -0.0003135021103349273, -0.0003214372740761897, -0.0003295732569503231, -0.0003379151396166904, -0.0003464681311931652, -0.0003552375724999271, -0.0003642289393849639, -0.0003734478461333344, -0.0003829000489622612, -0.0003925914496041304, -0.0004025280989798987, -0.0004127162009646798, -0.0004231621162482812, -0.0004338723662929203, -0.0004448536373902927, -0.0004561127848208907, -0.0004676568371178956, -0.0004794930004380202, -0.0004916286630428404, -0.0005040713998921115, -0.000516828977352829, -0.0005299093580267132, -0.0005433207056988405, -0.0005570713904105221, -0.0005711699936596101, -0.0005856253137316025, -0.0006004463711642026, -0.000615642414349178, -0.0006312229252748622, -0.0006471976254127428, -0.0006635764817513259, -0.0006803697129818547, -0.0006975877958385217, -0.0007152414715979632, -0.0007333417527414882, -0.000751899929784114, -0.0007709275782747015, -0.0007904365659713238, -0.0008104390601960736, -0.0008309475353741795, -0.000851974780761313, -0.0008735339083645884, -0.0008956383610609366, -0.000918301920918973, -0.0009415387177285175, -0.0009653632377428082, -0.000989790332639336, -0.001014835228704265, -0.001040513536245633, -0.00106684125924171, -0.001093834805229421, -0.001121510995439443, -0.001149887075183643, -0.001178980724501073, -0.001208810069068709, -0.001239393691384028, -0.001270750642225109, -0.001302900452395832, -0.001335863144762843, -0.001369659246590788, -0.001404309802184317, -0.001439836385843531, -0.001476261115139815, -0.00151360666452153, -0.00155189627925621, -0.001591153789716964, -0.001631403626024043, -0.001672670833046186, -0.001714981085773853, -0.001758360705071452, -0.001802836673818561, -0.001848436653448338, -0.001895189000893289, -0.001943122785949797, -0.001992267809067611, -0.002042654619577635, -0.00209431453436788, -0.002147279657017061, -0.002201582897396403, -0.00225725799175505, -0.002314339523292546, -0.002372862943238838, -0.002432864592445457, -0.002494381723507543, -0.002557452523422012, -0.002622116136800973, -0.002688412689646497, -0.00275638331370795, -0.002826070171426126, -0.002897516481486952, -0.002970766544990276, -0.003045865772257056, -0.003122860710278928, -0.0032017990708362, -0.003282729759289756, -0.003365702904067808, -0.003450769886863599, -0.003537983373555371, -0.003627397345870272, -0.003719067133806299, -0.003813049448828006, -0.003909402417856986, -0.004008185618071303, -0.004109460112533974, -0.004213288486667597, -0.004319734885593412, -0.004428865052356293, -0.004540746367051144, -0.004655447886873873, -0.004773040387113907, -0.004893596403110814, -0.005017190273194185, -0.005143898182628848, -0.005273798208584227, -0.005406970366153877, -0.005543496655441905, -0.005683461109742378, -0.005826949844832416, -0.005974051109403168, -0.006124855336647978, -0.006279455197038683, -0.006437945652305156, -0.00660042401064872, -0.006766989983210039, -0.006937745741817812, -0.00711279597804274, -0.007292247963580757, -0.007476211611993732, -0.007664799541829103, -0.007858127141147676, -0.008056312633484669, -0.008259477145268538, -0.008467744774725516, -0.008681242662296614, -0.00890010106259151, -0.009124453417908756, -0.009354436433344625, -0.009590190153524853, -0.009831858040975253, -0.01007958705616936, -0.01033352773926949, -0.01059383429359572, -0.01086066467084371, -0.01113418065807898, -0.01141454796653625, -0.01170193632224391, -0.01199651955850386, -0.01229847571024992, -0.01260798711030696, -0.01292524048757756, -0.01325042706717828, -0.01358374267254704, -0.01392538782954495, -0.01427556787257236, -0.0146344930527198, -0.01500237864797174, -0.01537944507548256, -0.01576591800593927, -0.01616202848002895, -0.01656801302702106, -0.01698411378548016, -0.01741057862611849, -0.01784766127679531, -0.01829562144967176, -0.0187547249705257, -0.01922524391022572, -0.01970745671836885, -0.02020164835907366, -0.02070811044892585, -0.02122714139706799, -0.02175904654741717, -0.02230413832300052, -0.0228627363723817, -0.02343516771816295, -0.02402176690752569, -0.0246228761647863, -0.02523884554592244, -0.02587003309503186, -0.0265168050026723, -0.02717953576603222, -0.02785860835086945, -0.02855441435515309, -0.02926735417433178, -0.02999783716815427, -0.03074628182894562, -0.03151311595124601, -0.03229877680270896, -0.03310371129613805, -0.03392837616254345, -0.03477323812507877, -0.03563877407371639, -0.03652547124050028, -0.0374338273752112, -0.03836435092125855, -0.03931756119160502, -0.04029398854451634, -0.04129417455890846, -0.04231867220905563, -0.0433680460383996, -0.04444287233218965, -0.04554373928866012, -0.04667124718843484, -0.04782600856182664, -0.04900864835368444, -0.05021980408540837, -0.0514601260137394, -0.05273027728590527, -0.05403093409066988, -0.05536278580482022, -0.05672653513458608, -0.05812289825146557, -0.0595526049218963, -0.06101639863017932, -0.0625150366940353, -0.06404929037213097, -0.06561994496288055, -0.06722779989379322, -0.06887366880059283, -0.07055837959529725, -0.07228277452240316, -0.07404771020227949, -0.07585405766082012, -0.0777027023443691, -0.07959454411887158, -0.08153049725215808, -0.08351149037821971, -0.08553846644226312, -0.08761238262529533, -0.08973421024691201, -0.09190493464491499, -0.09412555503031057, -0.09639708431618396, -0.09872054891887078, -0.1010969885297794, -0.1035274558561499, -0.1060130163289538, -0.1085547477760733, -0.1111537400588096, -0.1138110946697019, -0.1165279242895491, -0.1193053523014446, -0.1221445122595566, -0.1250465473102871, -0.1280126095633667, -0.1310438594103487, -0.1341414647878731, -0.1373066003829829, -0.1405404467776872, -0.1438441895298588, -0.1472190181874801, -0.1506661252331453, -0.1541867049556369, -0.1577819522453107, -0.1614530613099183, -0.165201224307426, -0.1690276298922879, -0.1729334616715545, -0.1769198965671149, -0.1809881030803, -0.1851392394549913, -0.1893744517353315, -0.1936948717140527, -0.1981016147674049, -0.2025957775726089, -0.2071784357037368, -0.211850641101887, -0.2166134194155138, -0.2214677672067828, -0.2264146490198144, -0.231454994306746, -0.2365896942075556, -0.2418195981796855, -0.2471455104735979, -0.2525681864504806, -0.2580883287385093, -0.2637065832241896, -0.2694235348755477, -0.2752397033941529, -0.2811555386932117, -0.2871714161993128, -0.2932876319757282, -0.2995043976655696, -0.3058218352535768, -0.3122399716457542, -0.3187587330666876, -0.3253779392749243, -0.3320972975975134, -0.3389163967855388, -0.3458347006932584, -0.3528515417844101, -0.3599661144701458, -0.3671774682841596, -0.3744845009017048, -0.381885951010402, -0.3893803910421254, -0.396966219776642, -0.4046416548292489, -0.4124047250363149, -0.4202532627543648, -0.4281848960902903, -0.4361970410822338, -0.4442868938528593, -0.4524514227589987, -0.4606873605640259, -0.4689911966618984, -0.4773591693844271, -0.4857872584261583, -0.4942711774242056, -0.5028063667333829, -0.511387986440263, -0.5200109096630298, -0.5286697161874694, -0.5373586864929923, -0.5460717962261854, -0.5548027111831652, -0.5635447828657552, -0.5722910446803782, -0.581034208852448, -0.5897666641328687, -0.5984804743771865, -0.607167378081652, -0.6158187889642208, -0.6244257976820708, -0.632979174780558, -0.6414693749717644, -0.6498865428435497, -0.6582205201025584, -0.6664608544567118, -0.6745968102441849, -0.6826173809168985, -0.6905113034867345, -0.6982670750421922, -0.7058729714417226, -0.7133170682875154, -0.7205872642798778, -0.7276713070474228, -0.734556821541933, -0.7412313410788467, -0.7476823410945735, -0.7538972756802894, -0.7598636169381531, -0.7655688971898745, -0.7710007540492138, -0.7761469783488433, -0.780995564888155, -0.7855347659416454, -0.7897531474373758, -0.7936396476815373, -0.7971836384680389, -0.8003749883713548, -0.8032041279762494, -0.8056621167495175, -0.8077407112063799, -0.8094324339676324, -0.8107306432430683, -0.811629602212204, -0.8121245477049581, -0.8122117575129773, -0.8118886155869818, -0.8111536742972688, -0.8100067128537929, -0.8084487908997345, -0.8064822962088619, -0.8041109853332586, -0.8013400159651561, -0.798175969695984, -0.7946268637787889, -0.7907021504284736, -0.7864127021299507, -0.7817707813691794, -0.7767899931588297, -0.7714852187014298, -0.7658725285213536, -0.759969073406041, -0.7537929515298505, -0.747363050194718, -0.740698860714185, -0.7338202650957105, -0.7267472933444982, -0.7194998504255437, -0.7120974121828922, -0.7045586898314419, -0.69690126301141, -0.6891411818330789, -0.6812925388436344, -0.6733670124225098, -0.6653733837594097, -0.657317030292422, -0.6491993992833659, -0.6410192506242824, -0.6327792453464068, -0.6244834109604144, -0.6161357730602252, -0.6077403526521449, -0.5993011635531702, -0.5908222098573821, -0.5823074834690507, -0.5737609617007872, -0.565186604934832, -0.5565883537818711, -0.547970122326802, -0.5393358062440837, -0.5306892760889922, -0.5220343746599444, -0.5133749149870808, -0.5047146783657318, -0.4960574124317736, -0.4874068292759368, -0.4787666035941074, -0.4701403708707156, -0.4615317255923738, -0.4529442194890515, -0.4443813598001732, -0.4358466075631984, -0.4273433759224071, -0.4188750284557953, -0.4104448775182264, -0.4020561825991869, -0.3937121486937582, -0.3854159246856771, -0.3771706017416292, -0.3689792117162141, -0.3608447255673028, -0.3527700517818281, -0.344758034812348, -0.3368114535250331, -0.3289330196600505, -0.3211253763056177, -0.3133910963873244, -0.3057326811746118, -0.2981525588066063, -0.2906530828397924, -0.2832365308202845, -0.2759051028837315, -0.2686609203861371, -0.2615060245691077, -0.2544423752632785, -0.2474718496338415, -0.2405962409722999, -0.2338172575387188, -0.2271365214588744, -0.2205555676808214, -0.2140758429954679, -0.2076987051258135, -0.2014254218895324, -0.1952571704395785, -0.1891950365874712, -0.1832400142138528, -0.1773930047708301, -0.1716548168804979, -0.1660261660338894, -0.1605076743944365, -0.1550998707098136, -0.1498031903358113, -0.144617975375633, -0.1395444749377199, -0.1345828455149085, -0.1297331514873855, -0.1249953657515597, -0.1203693704765891, -0.1158549579899096, -0.1114518317927015, -0.1071596077057996, -0.1029778151461122, -0.09890589853316631, -0.09494321882492647, -0.09108905518157533, -0.08734260675546218, -0.08370299460495753, -0.08016926372947396, -0.07674038522244075, -0.0734152585385578, -0.07019271387119187, -0.06707151463533466, -0.06405036005110748, -0.0611278878223776, -0.05830267690465472, -0.05557325035605375, -0.0529380782647561, -0.05039558074606952, -0.0479441310018818, -0.04558205843503018, -0.04330765181086139, -0.04111916245804734, -0.03901480750054195, -0.03699277311242089, -0.03505121778723947, -0.03318827561347215, -0.031402059547564, -0.02969066467612917, -0.02805217145887276, -0.02648464894389331, -0.02498615794713953, -0.02355475418794972, -0.02218849137279252, -0.02088542421955285, -0.01964361141496637, -0.01846111849809715, -0.01733602066307621, -0.01626640547466946, -0.01525037549062197, -0.01428605078512924, -0.01337157136821058, -0.0125050994962062, -0.01168482186908215, -0.01090895171070481, -0.01017573072873681, -0.009483430951304503, -0.008830356438093137, -0.008214844864034399, -0.007635268974260346, -0.007090037909505716, -0.006577598401642642, -0.006096435839527107, -0.00564507520582106, -0.005222081885926212, -0.00482606235062254, -0.00445566471444383, -0.004109579172242926, -0.003786538316797846, -0.003485317340685231, -0.003204734125998542, -0.002943649225812433, -0.002700965741592134, -0.002475629101015151, -0.00226662674091211, -0.002072987700243368, -0.001893782128207589, -0.001728120712727825, -0.001575154034679742, -0.001434071853315564, -0.001304102328396845, -0.001184511184579395, -0.001074600823595884, -0.0009737093897563306, -0.0008812097942350972, -0.0007965087035364036, -0.0007190454974298867, -0.0006482912015249603, -0.000583747399511216, -0.0005249451299395974, -0.0004714437721540233, -0.0004228299259772377, -0.0003787162893468685, -0.0003387405379686967, -0.0003025642108074934, -0.0002698716049915851, -0.0002403686834628298, -0.0002137819983999682, -0.0001898576332901107, -0.0001683601661484557, -0.0001490719991512378, -0.0001317950872463381, -0.0001163433364479253, -0.0001025450063163189, -9.02422029768887e-05, -7.928999469078057e-05, -6.955556286139526e-05, -6.091738881740264e-05, -5.326447657175263e-05, -4.649561156242599e-05, -4.051865524147743e-05, -3.524987523225568e-05, -3.061331063956861e-05, -2.654017197312625e-05, -2.296827503074447e-05, -1.984150798473004e-05, -1.710933082270058e-05, -1.472630621284605e-05, -1.265166079325405e-05, -1.084887582526173e-05, -9.285306101666021e-06, -7.931825961758125e-06, -6.762501236231922e-06, -5.754285925720074e-06, -4.886742406647079e-06, -4.141783956889706e-06, -3.503438401029892e-06, -2.957631690438113e-06, -2.491990256756258e-06, -2.09566100836695e-06, -1.75914787807726e-06, -1.474163876619038e-06, -1.233497661025521e-06, -1.030893690150978e-06, -8.609451126471435e-07, -7.189986172486036e-07, -5.990630185075549e-07, -4.9796179544284e-07, -4.129387416207082e-07, -3.416081316283891e-07, -2.819099748860501e-07, -2.320699459958092e-07, -1.905636005748874e-07, -1.560845059568777e-07, -1.275159370204978e-07, -1.039058084891634e-07, -8.444453612794256e-08, -6.84455401592206e-08, -5.532812475831257e-08, -4.460248753057848e-08, -3.58566322884793e-08, -2.874497713712037e-08, -2.297846775629161e-08, -1.831602277045284e-08, -1.455715417737996e-08, -1.15356209290709e-08, -9.113987907264399e-09, -7.178975705857782e-09, -5.637498830426794e-09, -4.41330116780289e-09, -3.444107891652628e-09, -2.679222385777145e-09, -2.077505323738002e-09, -1.60568078426686e-09, -1.236921252913845e-09, -9.496696099767535e-10, -7.26661782664659e-10, -5.541186958528563e-10, -4.210805405348626e-10, -3.18860241073442e-10, -2.405963890162585e-10, -1.808888680790018e-10, -1.355029651661919e-10, -1.011299867544535e-10, -7.51943166910505e-11, -5.569849577609578e-11, -4.109930790202333e-11, -3.020905172049861e-11, -2.211718775300229e-11, -1.612840286293853e-11, -1.171384883874087e-11, -8.47290888327892e-12, -6.103342215452146e-12, -4.378067847975562e-12, -3.127187096623781e-12, -2.22412196014478e-12, -1.57497308836478e-12, -1.110385135152297e-12, -7.793561390598089e-13, -5.445473591336367e-13, -3.787459287544431e-13, -2.62209153630793e-13, -1.806799265302854e-13, -1.239105963540041e-13, -8.457021914349514e-14, -5.743949155709325e-14, -3.88205035787525e-14, -2.610611093311948e-14, -1.746732017601895e-14, -1.16274847262649e-14, -7.700033760953261e-15, -5.072457814074903e-15, -3.323796856065147e-15, -2.166265117698634e-15, -1.404172199878735e-15, -9.051716007123635e-16, -5.802471747469482e-16, -3.698589358812096e-16, -2.344062463002364e-16, -1.477001225053207e-16, -9.252079911146059e-17, -5.761194983809264e-17, -3.565889510775906e-17, -2.193675184614836e-17, -1.341196777847922e-17, -8.148817797008948e-18, -4.919765510446304e-18, -2.951258313342848e-18, -1.758927604280226e-18, -1.041432398327571e-18, -6.125204490744025e-19, -3.57832717890101e-19, -2.076216074388899e-19, -1.196356834661463e-19, -6.845525178869377e-20, -3.88930814082913e-20, -2.193909390519006e-20, -1.228588649359912e-20, -6.829611775946059e-21, -3.768313036870053e-21, -2.063567072325681e-21, -1.121423308193198e-21, -6.047252918307168e-22, -3.235503840775468e-22, -1.717426162784987e-22, -9.043222032011723e-23, -4.723167298939735e-23, -2.446605673454227e-23, -1.2568113150091e-23, -6.401857642999766e-24, -3.233147886241955e-24, -1.618758169165256e-24, -8.033926478576522e-25, -3.951984615263131e-25, -1.926617862387418e-25, -9.307213725084722e-26, -4.454897976646098e-26, -2.112512986772725e-26, -9.923240246111165e-27, -4.61687823203028e-27, -2.127314906843735e-27, -9.706230249003286e-28, -4.384813906795239e-28, -1.961007109408635e-28, -8.681196059135082e-29, -3.80361572434219e-29, -1.649202386564016e-29, -7.075455347039459e-30, -3.003171293686619e-30, -1.260934437910244e-30, -5.236393645067952e-31, -2.150499707617745e-31, -8.732789540607556e-32, -3.505996886252726e-32, -1.391402025517801e-32, -5.457744141662161e-33, -2.115578855059142e-33, -8.102817041692386e-34, -3.065971184118605e-34, -1.145932277544116e-34, -4.230010798417601e-35, -1.541869391124105e-35, -5.548918892040305e-36, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "2P", + "angular_momentum": 1 + } + ], + "total_charge_density": [ + 2.056040731095226e-07, 2.108089651139438e-07, 2.161456195925846e-07, 2.21617372128658e-07, 2.272276427460846e-07, 2.329799380471176e-07, 2.38877853404086e-07, 2.449250752066204e-07, 2.511253831657677e-07, 2.574826526764359e-07, 2.640008572396422e-07, 2.706840709460827e-07, 2.775364710225729e-07, 2.845623404429526e-07, 2.917660706050872e-07, 2.991521640756329e-07, 3.067252374042922e-07, 3.14490024009309e-07, 3.224513771360105e-07, 3.306142728902482e-07, 3.389838133486257e-07, 3.475652297474671e-07, 3.563638857525114e-07, 3.653852808113817e-07, 3.746350535909244e-07, 3.841189855015566e-07, 3.938430043108455e-07, 4.038131878485563e-07, 4.140357678054965e-07, 4.245171336285338e-07, 4.352638365142068e-07, 4.462825935034444e-07, 4.575802916799372e-07, 4.691639924747942e-07, 4.810409360801752e-07, 4.932185459746465e-07, 5.057044335631056e-07, 5.18506402934161e-07, 5.316324557379487e-07, 5.450907961874327e-07, 5.588898361863064e-07, 5.730382005867172e-07, 5.875447325800858e-07, 6.02418499224397e-07, 6.176687971114167e-07, 6.333051581773668e-07, 6.493373556607098e-07, 6.657754102107489e-07, 6.82629596150869e-07, 6.999104479003373e-07, 7.176287665586651e-07, 7.357956266566595e-07, 7.544223830783782e-07, 7.735206781583133e-07, 7.931024489582482e-07, 8.131799347283153e-07, 8.33765684556946e-07, 8.548725652144748e-07, 8.76513769195303e-07, 8.987028229636618e-07, 9.214535954081004e-07, 9.447803065100197e-07, 9.686975362316416e-07, 9.932202336289773e-07, 1.018363726195506e-06, 1.044143729442368e-06, 1.070576356721106e-06, 1.097678129295051e-06, 1.125465986665684e-06, 1.153957297160401e-06, 1.183169868788316e-06, 1.213121960370869e-06, 1.243832292954231e-06, 1.275320061510585e-06, 1.307604946935666e-06, 1.340707128349978e-06, 1.374647295711457e-06, 1.409446662747404e-06, 1.445126980213799e-06, 1.481710549490287e-06, 1.519220236519304e-06, 1.557679486098093e-06, 1.597112336532523e-06, 1.637543434661859e-06, 1.678998051263917e-06, 1.72150209685017e-06, 1.765082137860733e-06, 1.809765413269324e-06, 1.85557985160858e-06, 1.90255408842639e-06, 1.950717484184101e-06, 2.000100142607866e-06, 2.050732929504542e-06, 2.102647492053905e-06, 2.155876278589292e-06, 2.210452558878937e-06, 2.266410444920802e-06, 2.323784912263774e-06, 2.38261182186866e-06, 2.442927942522582e-06, 2.50477097382079e-06, 2.568179569730285e-06, 2.633193362749949e-06, 2.699852988682306e-06, 2.768200112032399e-06, 2.838277452049606e-06, 2.910128809428763e-06, 2.983799093687207e-06, 3.059334351234886e-06, 3.136781794155088e-06, 3.216189829713728e-06, 3.297608090615711e-06, 3.381087466027236e-06, 3.466680133383421e-06, 3.554439591001203e-06, 3.644420691517768e-06, 3.736679676175562e-06, 3.831274209975208e-06, 3.928263417718323e-06, 4.027707920962827e-06, 4.129669875913703e-06, 4.234213012273079e-06, 4.341402673073729e-06, 4.451305855521057e-06, 4.563991252868963e-06, 4.679529297355813e-06, 4.797992204227387e-06, 4.919454016874279e-06, 5.043990653111964e-06, 5.171679952632501e-06, 5.302601725657424e-06, 5.436837802822405e-06, 5.574472086324699e-06, 5.715590602365472e-06, 5.860281554919718e-06, 6.008635380867349e-06, 6.160744806520018e-06, 6.316704905578947e-06, 6.476613158559962e-06, 6.640569513722974e-06, 6.808676449543818e-06, 6.981039038767731e-06, 7.157765014084313e-06, 7.338964835465117e-06, 7.524751759205955e-06, 7.71524190871702e-06, 7.91055434710513e-06, 8.110811151593484e-06, 8.316137489825334e-06, 8.52666169809946e-06, 8.74251536158609e-06, 8.963833396573678e-06, 9.190754134797761e-06, 9.423419409904704e-06, 9.661974646104381e-06, 9.906568949067061e-06, 1.015735519912155e-05, 1.041449014681271e-05, 1.067813451087805e-05, 1.09484530787048e-05, 1.122561480932998e-05, 1.150979293904823e-05, 1.180116508969304e-05, 1.209991337965932e-05, 1.240622453773675e-05, 1.272029001982451e-05, 1.304230612860144e-05, 1.337247413622515e-05, 1.37110004101378e-05, 1.405809654205679e-05, 1.441397948023066e-05, 1.477887166504376e-05, 1.515300116805343e-05, 1.553660183454738e-05, 1.592991342971017e-05, 1.633318178848958e-05, 1.674665896925794e-05, 1.717060341136274e-05, 1.760528009666638e-05, 1.80509607151756e-05, 1.85079238348635e-05, 1.897645507579175e-05, 1.945684728864017e-05, 1.994940073775659e-05, 2.04544232888409e-05, 2.097223060138015e-05, 2.150314632595639e-05, 2.204750230654885e-05, 2.260563878795823e-05, 2.317790462848248e-05, 2.376465751797619e-05, 2.436626420143169e-05, 2.498310070821941e-05, 2.561555258713277e-05, 2.626401514738361e-05, 2.692889370569832e-05, 2.76106038396709e-05, 2.830957164752916e-05, 2.902623401447815e-05, 2.976103888578674e-05, 3.051444554678734e-05, 3.128692490996598e-05, 3.207895980931907e-05, 3.289104530216401e-05, 3.372368897859046e-05, 3.457741127874569e-05, 3.545274581815422e-05, 3.635023972127259e-05, 3.727045396348978e-05, 3.821396372178666e-05, 3.918135873427231e-05, 4.017324366882509e-05, 4.119023850106517e-05, 4.223297890189791e-05, 4.330211663486922e-05, 4.439831996358035e-05, 4.552227406941974e-05, 4.667468147986933e-05, 4.785626250765655e-05, 4.906775570102472e-05, 5.030991830540322e-05, 5.158352673676848e-05, 5.288937706698801e-05, 5.422828552145456e-05, 5.560108898931993e-05, 5.700864554664635e-05, 5.84518349928063e-05, 5.99315594004617e-05, 6.144874367946935e-05, 6.300433615506529e-05, 6.459930916068694e-05, 6.623465964580825e-05, 6.791140979916309e-05, 6.963060768775051e-05, 7.139332791202021e-05, 7.32006722776457e-05, 7.505377048431131e-05, 7.695378083193606e-05, 7.890189094478252e-05, 8.08993185139011e-05, 8.294731205837184e-05, 8.504715170582668e-05, 8.720014999273091e-05, 8.940765268493345e-05, 9.167103961899371e-05, 9.399172556481271e-05, 9.637116111010932e-05, 9.881083356729311e-05, 0.0001013122679033024, 0.0001038770276929891, 0.0001065067160966459, 0.0001092029768622891, 0.0001119674953533218, 0.0001148019996022228, 0.0001177082613909195, 0.0001206880973585204, 0.0001237433701371006, 0.0001268759895162518, 0.0001300879136371243, 0.0001333811502167096, 0.0001367577578031277, 0.000140219847062708, 0.0001437695820996646, 0.0001474091818091975, 0.0001511409212648619, 0.0001549671331410761, 0.0001588902091716588, 0.0001629126016453077, 0.0001670368249389551, 0.0001712654570899632, 0.0001756011414081394, 0.0001800465881285837, 0.0001846045761064021, 0.0001892779545543466, 0.0001940696448244709, 0.000198982642234916, 0.0002040200179429717, 0.0002091849208655834, 0.0002144805796485108, 0.0002199103046853706, 0.000225477490187825, 0.0002311856163082172, 0.0002370382513159795, 0.0002430390538291794, 0.0002491917751026025, 0.0002555002613738014, 0.0002619684562685879, 0.000268600403267465, 0.0002754002482345562, 0.0002823722420106086, 0.0002895207430716951, 0.0002968502202552878, 0.0003043652555554038, 0.0003120705469885807, 0.0003199709115324775, 0.0003280712881389387, 0.0003363767408234185, 0.0003448924618326933, 0.0003536237748928557, 0.000362576138539625, 0.0003717551495330585, 0.0003811665463588119, 0.0003908162128181398, 0.0004007101817088919, 0.0004108546385998123, 0.0004212559257005073, 0.0004319205458295184, 0.0004428551664829789, 0.0004540666240064193, 0.0004655619278723321, 0.0004773482650661822, 0.0004894330045836217, 0.0005018237020417248, 0.0005145281044071444, 0.00052755415484416, 0.0005409099976856539, 0.0005546039835301528, 0.000568644674468118, 0.0005830408494407873, 0.0005978015097349236, 0.000612935884616925, 0.0006284534371098461, 0.0006443638699169496, 0.0006606771314955236, 0.0006774034222847787, 0.000694553201091748, 0.0007121371916391911, 0.0007301663892796443, 0.0007486520678798262, 0.0007676057868797316, 0.000787039398530875, 0.0008069650553182064, 0.0008273952175704134, 0.0008483426612633688, 0.0008698204860216636, 0.0008918421233232541, 0.0009144213449123872, 0.0009375722714261301, 0.0009613093812399165, 0.000985647519537706, 0.001010601907612474, 0.001036188152402879, 0.00106242225627216, 0.001089320627035412, 0.001116900088241569, 0.001145177889716611, 0.001174171718374614, 0.001203899709303516, 0.001234380457132578, 0.001265633027688731, 0.001297676969949195, 0.001330532328297889, 0.001364219655093447, 0.001398760023556741, 0.001434175040986089, 0.001470486862308537, 0.001507718203975749, 0.001545892358213371, 0.001585033207632851, 0.001625165240215015, 0.001666313564674894, 0.001708503926217523, 0.001751762722694768, 0.001796117021173403, 0.00184159457492497, 0.001888223840848237, 0.001936033997335287, 0.001985054962592672, 0.002035317413429224, 0.002086852804522525, 0.002139693388176309, 0.002193872234581326, 0.002249423252592685, 0.002306381211036846, 0.002364781760561885, 0.002424661456045014, 0.002486057779571597, 0.002549009164000427, 0.002613555017130293, 0.002679735746483302, 0.002747592784720849, 0.002817168615708456, 0.00288850680124627, 0.002961652008482301, 0.003036650038026019, 0.003113547852780351, 0.003192393607510649, 0.003273236679169539, 0.003356127697997311, 0.003441118579417749, 0.003528262556750019, 0.00361761421475772, 0.003709229524056666, 0.003803165876403739, 0.003899482120889536, 0.003998238601058239, 0.004099497192978798, 0.004203321344291962, 0.004309776114258652, 0.004418928214835523, 0.004530846052804486, 0.004645599772983576, 0.004763261302547199, 0.004883904396484803, 0.005007604684227437, 0.005134439717472761, 0.00526448901923973, 0.005397834134184931, 0.005534558680213717, 0.005674748401419814, 0.005818491222388178, 0.005965877303896855, 0.006116999100054327, 0.006271951416910154, 0.006430831472577399, 0.006593738958906638, 0.006760776104752271, 0.006932047740872934, 0.007107661366509191, 0.00728772721768252, 0.007472358337261066, 0.007661670646838781, 0.007855783020475748, 0.008054817360349029, 0.008258898674364547, 0.008468155155781861, 0.008682718264905413, 0.00890272281289676, 0.00912830704776453, 0.009359612742589729, 0.009596785286046038, 0.009839973775276331, 0.01008933111118804, 0.0103450140962323, 0.01060718353473308, 0.01087600433583463, 0.01115164561913754, 0.01143428082309527, 0.01172408781624571, 0.01202124901135374, 0.01232595148254344, 0.01263838708550034, 0.01295875258082676, 0.0132872497606356, 0.01362408557847, 0.0139694722826393, 0.01432362755306405, 0.0146867746417251, 0.01505914251681544, 0.01544096601069543, 0.01583248597175517, 0.01623394942029131, 0.01664560970850744, 0.01706772668475212, 0.01750056686211009, 0.01794440359146689, 0.01839951723917004, 0.01886619536941333, 0.01934473293147521, 0.01983543245194527, 0.02033860423207718, 0.02085456655041064, 0.02138364587080831, 0.02192617705605934, 0.0224825035872042, 0.02305297778874092, 0.02363796105987759, 0.02423782411199996, 0.02485294721252967, 0.02548372043535214, 0.02613054391799947, 0.02679382812577926, 0.02747399412304513, 0.028171473851812, 0.02888671041792378, 0.02962015838498842, 0.0303722840763016, 0.03114356588498589, 0.03193449459258132, 0.03274557369632796, 0.03357731974539008, 0.03443026268627854, 0.03530494621773501, 0.03620192815535153, 0.0371217808062053, 0.0380650913537977, 0.03903246225359679, 0.04002451163948803, 0.04104187374145243, 0.04208519931479575, 0.04315515608126592, 0.04425242918240518, 0.04537772164549185, 0.04653175486244154, 0.04771526908204511, 0.04892902391593389, 0.05017379885867476, 0.05145039382240817, 0.05275962968645739, 0.05410234886234763, 0.05547941587468828, 0.05689171795838549, 0.05834016567266398, 0.0598256935323954, 0.06134926065724147, 0.06291185143913673, 0.06451447622865338, 0.0661581720408011, 0.06784400328083917, 0.06957306249068662, 0.07134647111653943, 0.07316538029831895, 0.07503097168159192, 0.07694445825262593, 0.07890708519725678, 0.08092013078426782, 0.08298490727399976, 0.08510276185292578, 0.08727507759495431, 0.08950327445023404, 0.09178881026226338, 0.09413318181412292, 0.09653792590467267, 0.09900462045557892, 0.101534885650054, 0.1041303851042168, 0.1067928270720028, 0.1095239656845737, 0.1123256022252012, 0.1151995864406182, 0.1181478178898551, 0.1211722473315995, 0.1242748781511375, 0.1274577678279589, 0.1307230294451258, 0.1340728332415257, 0.1375094082081479, 0.1410350437295412, 0.1446520912716263, 0.1483629661170559, 0.1521701491493234, 0.1560761886868411, 0.160083702368212, 0.1641953790899377, 0.1684139809978006, 0.1727423455331723, 0.1771833875354945, 0.1817401014021779, 0.1864155633071603, 0.1912129334793576, 0.1961354585422157, 0.2011864739155707, 0.2063694062809771, 0.2116877761116585, 0.2171452002681756, 0.2227453946608773, 0.2284921769801416, 0.2343894694953532, 0.2404413019234992, 0.2466518143681808, 0.2530252603297458, 0.2595660097871545, 0.2662785523520587, 0.2731675004954649, 0.280237592847191, 0.2874936975681693, 0.2949408157954818, 0.3025840851597888, 0.3104287833746254, 0.3184803318967754, 0.3267442996566772, 0.3352264068575243, 0.3439325288414002, 0.3528687000204533, 0.3620411178707127, 0.3714561469857585, 0.3811203231869892, 0.3910403576867466, 0.4012231413000273, 0.4116757486999321, 0.4224054427113749, 0.4334196786369102, 0.4447261086077837, 0.456332585952562, 0.4682471695748036, 0.4804781283303615, 0.4930339453938921, 0.5059233226031091, 0.5191551847681852, 0.5327386839324824, 0.5466832035694974, 0.5609983626995221, 0.5756940199080056, 0.590780277246065, 0.6062674839918438, 0.6221662402496524, 0.6384874003618932, 0.655242076106716, 0.6724416396522181, 0.6900977262356716, 0.708222236533842, 0.7268273386878985, 0.7459254699436567, 0.7655293378650821, 0.7856519210759085, 0.8063064694810635, 0.8275065039162612, 0.8492658151705736, 0.87159846232319, 0.8945187703316572, 0.9180413268049443, 0.9421809778904867, 0.966952823200002, 0.9923722096944481, 1.018454724443753, 1.04521618617223, 1.07267263549563, 1.100840323750678, 1.129735700312898, 1.159375398293107, 1.189776218497763, 1.22095511153289, 1.252929157925904, 1.285715546134321, 1.31933154830494, 1.353794493641856, 1.38912173923658, 1.425330638208569, 1.462438504999921, 1.500462577663557, 1.539419976980361, 1.57932766223732, 1.620202383495777, 1.662060630176902, 1.704918575789967, 1.748792018628694, 1.793696318261543, 1.839646327643601, 1.886656320681073, 1.934739915083991, 1.983909990349359, 2.034178600725352, 2.08555688301766, 2.138054959112266, 2.191681833104149, 2.246445282940139, 2.302351746505614, 2.359406202109563, 2.417612043351547, 2.47697094838638, 2.537482743639669, 2.599145262068899, 2.661954196111072, 2.725902945510139, 2.7909824602744, 2.857181079077481, 2.924484363485955, 2.992874928471976, 3.06233226975229, 3.132832588583521, 3.204348614740203, 3.276849428505144, 3.350300282611594, 3.424662425194477, 3.499892924931379, 3.575944499684896, 3.652765350094582, 3.730298999708552, 3.808484143392306, 3.887254505902486, 3.966538712667156, 4.046260174968719, 4.12633699187956, 4.206681871452689, 4.287202073816161, 4.367799378959886, 4.448370082132496, 4.528805019880476, 4.608989629859643, 4.688804047623089, 4.768123243637865, 4.846817203797936, 4.924751156677289, 5.001785850699967, 5.077777884283887, 5.152580091837448, 5.226041988243292, 5.298010274143779, 5.368329403940823, 5.43684221792782, 5.503390639377095, 5.567816436702147, 5.629962049992202, 5.689671480269668, 5.746791238740428, 5.801171352087792, 5.852666418497205, 5.901136707587557, 5.946449295765959, 5.988479226716315, 6.02711068478438, 6.062238166941071, 6.093767636804852, 6.121617641902255, 6.145720372966222, 6.166022641647023, 6.182486750577837, 6.195091227343539, 6.203831391602566, 6.208719722472857, 6.209785991389817, 6.20707712406232, 6.200656753990186, 6.190604429370418, 6.177014435229754, 6.159994193406201, 6.139662204701203, 6.116145500282984, 6.089576573391939, 6.060089767735831, 6.027817105819235, 5.992883548977288, 5.955401691216191, 5.915465901223221, 5.873145941194219, 5.828480107500397, 5.781467956699865, 5.732062700961773, 5.680163379513833, 5.625656122547302, 5.568579951087981, 5.509020552982427, 5.447067021186087, 5.38281161647377, 5.316349523601776, 5.247778601869502, 5.177199131038444, 5.104713553571631, 5.03042621415677, 4.954443097471706, 4.87687156514148, 4.797820092822387, 4.717398008330219, 4.635715231707476, 4.552882018098121, 4.469008704268472, 4.384205459579603, 4.298582042182447, 4.212247561150013, 4.125310226372852, 4.037877065779705, 3.950053730261275, 3.861944388580721, 3.773651473454112, 3.685275510556256, 3.596914936819679, 3.508665931960735, 3.420622260040689, 3.332875121808815, 3.245513017833124, 3.158621622661301, 3.072283670098174, 2.986578849657339, 2.901583714199598, 2.817371598727253, 2.734012550261571, 2.651573268690724, 2.570117058437653, 2.48970379076135, 2.410389876471423, 2.332228248804346, 2.255268356180661, 2.179556164535605, 2.105134168891173, 2.032041413815583, 1.960313522396379, 1.889982733336116, 1.82107794576433, 1.753624771347291, 1.687645593265749, 1.623159631623007, 1.560183014839569, 1.498728856586219, 1.438807337805256, 1.380425793369292, 1.323588802928561, 1.268298285500898, 1.214553597363399, 1.162351632811158, 1.111686927356291, 1.0625517629496, 1.014936274817655, 0.9688285595195723, 0.9242147838403698, 0.8810792941513074, 0.8394047258819797, 0.7991721127640772, 0.7603609955224995, 0.7229495297058635, 0.6869145923652722, 0.6522318873074019, 0.6188760486654731, 0.5868207425493723, 0.5560387665540133, 0.5265021469229091, 0.4981822331817501, 0.4710497900745266, 0.4450750866522638, 0.4202279823817465, 0.3964780101585839, 0.3737944561255738, 0.3521464362134974, 0.331502969337165, 0.311833047194684, 0.2931057006324984, 0.2752900625527074, 0.2583554273524742, 0.2422713068979683, 0.2270074830471962, 0.212534056747278, 0.1988214937421754, 0.1858406669365829, 0.1735628954706451, 0.1619599805683555, 0.1510042382299399, 0.1406685288452294, 0.1309262838110062, 0.1217515292405715, 0.1131189068583613, 0.1050036921763473, 0.09738181005223384, 0.09022984773212715, 0.08352506548244046, 0.07724540491734418, 0.07136949512910636, 0.06587665672923489, 0.06074690390845746, 0.05596094462330597, 0.05150017901643316, 0.04734669617682419, 0.04348326934480494, 0.03989334966522431, 0.03656105859043389, 0.03347117903273066, 0.03060914536379631, 0.02796103235638461, 0.02551354316109877, 0.02325399640858282, 0.02117031252484625, 0.01925099934476045, 0.01748513710602571, 0.0158623629031192, 0.01437285467790573, 0.01300731482073292, 0.01175695345294649, 0.01061347145885402, 0.009569043332240209, 0.008616299899596919, 0.007748310979278023, 0.006958568032824504, 0.006240966861730892, 0.00558979039994136, 0.004999691649374028, 0.00446567680277801, 0.003983088595231522, 0.00354758992259459, 0.003155147762240465, 0.002802017428410608, 0.002484727191574521, 0.002200063288234454, 0.001945055344702815, 0.001716962235505228, 0.001513258394232603, 0.001331620591890514, 0.001169915195082619, 0.001026185913726367, 0.000898642045443246, 0.00078564722130171, 0.0006857086552278253, 0.0005974668971452761, 0.0005196860877707785, 0.0004512447109806294, 0.0003911268377856041, 0.0003384138542104051, 0.0002922766637749023, 0.0002519683538210645, 0.0002168173136240209, 0.0001862207910693278, 0.0001596388736711369, 0.0001365888788463642, 0.0001166401376457067, 9.940915556994905e-05, 8.455513366488802e-05, 7.177583278484489e-05, 6.080376373676773e-05, 5.140268595718363e-05, 4.33643974249581e-05, 3.650579866561892e-05, 3.066621394917062e-05, 2.570495311383111e-05, 2.149909785376969e-05, 1.794149678046226e-05, 1.493895409551395e-05, 1.24105972886955e-05, 1.028640988972044e-05, 8.505915947511674e-06, 7.017003578341661e-06, 5.774875606723692e-06, 4.741116013255616e-06, 3.882861595307754e-06, 3.172068933627881e-06, 2.584867435407603e-06, 2.100989887170491e-06, 1.703272596072152e-06, 1.377217819785974e-06, 1.110611784436976e-06, 8.93192160745882e-07, 7.163594107461353e-07, 5.729269302525383e-07, 4.569053939680047e-07, 3.633171605202248e-07, 2.880370367082239e-07, 2.276558139813236e-07, 1.793653422623027e-07, 1.408552104706296e-07, 1.102383634352399e-07, 8.597294395993927e-08, 6.680010202632162e-08, 5.169740203309876e-08, 3.983773561778133e-08, 3.055409357377446e-08, 2.331028321473493e-08, 1.76766675190435e-08, 1.331013253681774e-08, 9.937603426601029e-09, 7.342528866456938e-09, 5.35387037244776e-09, 3.836986753490439e-09, 2.690415420945932e-09, 1.815121611318709e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "core_charge_density": [ + 4.781926865736377, 4.78192684929691, 4.781926832441276, 4.781926815158941, 4.781926797439101, 4.78192677927068, 4.781926760642326, 4.781926741542391, 4.78192672195894, 4.781926701879731, 4.781926681292214, 4.781926660183522, 4.781926638540461, 4.781926616349505, 4.78192659359678, 4.781926570268068, 4.781926546348786, 4.781926521823987, 4.781926496678338, 4.781926470896125, 4.781926444461232, 4.781926417357135, 4.781926389566896, 4.781926361073145, 4.78192633185807, 4.781926301903413, 4.781926271190448, 4.781926239699983, 4.781926207412333, 4.781926174307318, 4.781926140364244, 4.781926105561898, 4.781926069878526, 4.781926033291827, 4.781925995778932, 4.781925957316391, 4.781925917880169, 4.781925877445614, 4.781925835987453, 4.781925793479775, 4.781925749896008, 4.781925705208915, 4.781925659390563, 4.781925612412315, 4.781925564244807, 4.781925514857932, 4.781925464220824, 4.781925412301831, 4.781925359068503, 4.781925304487568, 4.78192524852491, 4.781925191145551, 4.78192513231363, 4.78192507199237, 4.78192501014407, 4.781924946730075, 4.781924881710747, 4.781924815045448, 4.781924746692511, 4.781924676609211, 4.781924604751744, 4.781924531075199, 4.781924455533524, 4.781924378079504, 4.781924298664725, 4.781924217239554, 4.781924133753096, 4.78192404815317, 4.781923960386273, 4.781923870397547, 4.781923778130749, 4.781923683528206, 4.781923586530789, 4.781923487077874, 4.781923385107296, 4.781923280555325, 4.781923173356611, 4.781923063444149, 4.781922950749243, 4.781922835201454, 4.781922716728561, 4.781922595256515, 4.781922470709392, 4.781922343009348, 4.781922212076564, 4.781922077829205, 4.78192194018336, 4.781921799052999, 4.781921654349907, 4.781921505983646, 4.781921353861478, 4.781921197888322, 4.781921037966692, 4.781920873996632, 4.781920705875656, 4.781920533498681, 4.781920356757969, 4.781920175543051, 4.781919989740659, 4.781919799234667, 4.781919603905998, 4.781919403632566, 4.781919198289197, 4.781918987747545, 4.781918771876013, 4.781918550539675, 4.781918323600192, 4.781918090915718, 4.781917852340817, 4.781917607726376, 4.781917356919503, 4.781917099763434, 4.781916836097443, 4.781916565756726, 4.781916288572316, 4.781916004370964, 4.781915712975036, 4.7819154142024, 4.781915107866317, 4.781914793775316, 4.781914471733082, 4.781914141538332, 4.781913802984679, 4.781913455860524, 4.781913099948901, 4.781912735027356, 4.781912360867804, 4.781911977236383, 4.781911583893317, 4.781911180592751, 4.781910767082615, 4.781910343104449, 4.781909908393261, 4.781909462677343, 4.781909005678108, 4.781908537109925, 4.781908056679923, 4.78190756408782, 4.781907059025736, 4.781906541177991, 4.781906010220919, 4.781905465822658, 4.781904907642947, 4.781904335332907, 4.781903748534833, 4.781903146881958, 4.781902529998238, 4.781901897498102, 4.781901248986229, 4.781900584057277, 4.781899902295651, 4.781899203275237, 4.781898486559128, 4.781897751699362, 4.781896998236636, 4.781896225700017, 4.781895433606652, 4.781894621461468, 4.781893788756856, 4.781892934972357, 4.781892059574338, 4.781891162015659, 4.781890241735326, 4.781889298158145, 4.781888330694363, 4.781887338739296, 4.781886321672953, 4.781885278859649, 4.781884209647606, 4.781883113368549, 4.781881989337282, 4.781880836851267, 4.781879655190183, 4.781878443615468, 4.781877201369872, 4.781875927676972, 4.781874621740688, 4.781873282744794, 4.781871909852396, 4.781870502205418, 4.781869058924063, 4.78186757910626, 4.781866061827108, 4.781864506138286, 4.781862911067472, 4.781861275617727, 4.781859598766886, 4.781857879466892, 4.781856116643174, 4.781854309193948, 4.781852455989545, 4.7818505558717, 4.781848607652826, 4.781846610115275, 4.781844562010575, 4.781842462058654, 4.78184030894703, 4.781838101330005, 4.78183583782781, 4.781833517025754, 4.781831137473335, 4.781828697683331, 4.781826196130876, 4.781823631252504, 4.781821001445171, 4.781818305065259, 4.781815540427543, 4.781812705804135, 4.781809799423415, 4.781806819468915, 4.78180376407819, 4.781800631341648, 4.781797419301359, 4.781794125949839, 4.78179074922878, 4.781787287027779, 4.781783737183013, 4.781780097475884, 4.781776365631637, 4.781772539317942, 4.781768616143424, 4.781764593656185, 4.781760469342262, 4.781756240624054, 4.781751904858721, 4.781747459336523, 4.781742901279137, 4.781738227837913, 4.781733436092097, 4.781728523047003, 4.781723485632152, 4.78171832069934, 4.781713025020681, 4.78170759528659, 4.781702028103705, 4.781696319992785, 4.781690467386515, 4.7816844666273, 4.781678313964958, 4.781672005554394, 4.78166553745319, 4.781658905619145, 4.781652105907749, 4.781645134069594, 4.781637985747718, 4.781630656474886, 4.781623141670798, 4.781615436639234, 4.781607536565105, 4.781599436511462, 4.781591131416409, 4.78158261608993, 4.781573885210664, 4.781564933322571, 4.781555754831523, 4.781546344001818, 4.781536694952593, 4.78152680165415, 4.781516657924196, 4.781506257423977, 4.781495593654321, 4.78148465995158, 4.781473449483474, 4.781461955244808, 4.78145017005312, 4.781438086544179, 4.781425697167397, 4.781412994181114, 4.781399969647752, 4.781386615428877, 4.781372923180106, 4.781358884345906, 4.78134449015424, 4.781329731611106, 4.781314599494915, 4.781299084350735, 4.781283176484387, 4.781266865956398, 4.7812501425758, 4.781232995893758, 4.781215415197062, 4.781197389501433, 4.781178907544668, 4.781159957779616, 4.781140528366969, 4.78112060716788, 4.781100181736377, 4.781079239311607, 4.781057766809518, 4.781035750816047, 4.781013177577432, 4.780990032990204, 4.780966302596349, 4.780941971571945, 4.780917024716691, 4.780891446446485, 4.78086522078303, 4.780838331344151, 4.780810761332955, 4.780782493527529, 4.780753510270106, 4.780723793457247, 4.780693324527228, 4.780662084448284, 4.780630053707714, 4.780597212299377, 4.780563539710876, 4.780529014911401, 4.780493616338431, 4.780457321883888, 4.780420108879873, 4.780381954086502, 4.78034283367521, 4.780302723216439, 4.780261597660922, 4.780219431327521, 4.78017619788502, 4.780131870336588, 4.78008642100312, 4.780039821505227, 4.779992042747205, 4.779943054896065, 4.779892827366265, 4.779841328798396, 4.779788527040203, 4.779734389127111, 4.779678881261608, 4.779621968790914, 4.779563616187927, 4.779503787026862, 4.77944244396114, 4.779379548701138, 4.779315061989254, 4.77924894357622, 4.779181152195324, 4.779111645538078, 4.779040380227463, 4.778967311788446, 4.778892394624276, 4.778815581986018, 4.778736825944326, 4.778656077359671, 4.778573285849469, 4.778488399760829, 4.778401366135729, 4.778312130679168, 4.778220637724512, 4.778126830201185, 4.778030649595775, 4.777932035919597, 4.77783092766885, 4.77772726178751, 4.777620973628323, 4.777511996912203, 4.777400263687476, 4.777285704288437, 4.777168247291422, 4.777047819470219, 4.776924345753249, 4.776797749172927, 4.776667950822338, 4.77653486980403, 4.776398423179979, 4.776258525922056, 4.776115090857448, 4.77596802861629, 4.7758172475762, 4.775662653805384, 4.775504151005022, 4.775341640449375, 4.775175020926079, 4.77500418867233, 4.774829037312583, 4.774649457790978, 4.774465338306632, 4.774276564242748, 4.774083018097494, 4.773884579411614, 4.773681124693554, 4.773472527344451, 4.773258657580365, 4.77303938235193, 4.772814565263812, 4.772584066490477, 4.772347742690386, 4.772105446918504, 4.771857028536523, 4.771602333120386, 4.771341202365887, 4.771073473992115, 4.770798981642217, 4.770517554782256, 4.770229018596228, 4.769933193880719, 4.76962989693515, 4.769318939449615, 4.769000128390982, 4.768673265884962, 4.768338149096565, 4.767994570106231, 4.767642315784172, 4.767281167661086, 4.766910901795955, 4.766531288640315, 4.766142092899568, 4.765743073390866, 4.76533398289743, 4.764914568019591, 4.764484569022057, 4.764043719677656, 4.763591747107181, 4.763128371615381, 4.762653306523253, 4.762166257996144, 4.7616669248679, 4.761154998460499, 4.760630162399732, 4.760092092426373, 4.759540456203361, 4.7589749131171, 4.758395114075594, 4.757800701300817, 4.757191308117135, 4.756566558733397, 4.755926068021218, 4.755269441287388, 4.754596274041369, 4.75390615175694, 4.75319864962882, 4.752473332323084, 4.751729753722393, 4.75096745666493, 4.750185972677682, 4.749384821703393, 4.748563511821166, 4.747721538961004, 4.746858386611941, 4.745973525522898, 4.745066413397836, 4.744136494583009, 4.743183199747941, 4.742205945559108, 4.74120413434601, 4.740177153760467, 4.739124376427895, 4.738045159591055, 4.736938844746247, 4.735804757271175, 4.734642206045037, 4.733450483060392, 4.732228863026664, 4.73097660296493, 4.729692941794539, 4.728377099910754, 4.727028278753405, 4.725645660366797, 4.724228406950283, 4.722775660399606, 4.721286541838831, 4.719760151142929, 4.718195566450331, 4.716591843666067, 4.714948015954789, 4.71326309322385, 4.711536061596323, 4.70976588287363, 4.707951493987977, 4.706091806444308, 4.704185705751525, 4.702232050843554, 4.70022967348917, 4.698177377691409, 4.696073939076017, 4.693918104268932, 4.691708590262964, 4.689444083773329, 4.687123240582226, 4.684744684872429, 4.682307008549751, 4.679808770554429, 4.677248496161712, 4.67462467627117, 4.671935766685268, 4.669180187377116, 4.66635632174723, 4.663462515869879, 4.660497077728668, 4.65745827644198, 4.654344341477958, 4.651153461859666, 4.647883785360324, 4.644533417689125, 4.641100421667698, 4.63758281639771, 4.633978576419842, 4.630285630864536, 4.626501862595075, 4.62262510734318, 4.618653152837928, 4.614583737928313, 4.610414551700232, 4.606143232588302, 4.601767367483502, 4.597284490837128, 4.592692083762043, 4.58798757313197, 4.58316833067991, 4.578231672096496, 4.57317485612955, 4.56799508368589, 4.562689496936586, 4.557255178427044, 4.551689150193305, 4.545988372886011, 4.540149744903631, 4.534170101536628, 4.52804621412441, 4.521774789226852, 4.51535246781246, 4.508775824465334, 4.502041366613148, 4.495145533778482, 4.488084696856141, 4.480855157419012, 4.473453147055353, 4.465874826740426, 4.458116286245599, 4.45017354358828, 4.442042544526053, 4.433719162098743, 4.425199196222189, 4.416478373337837, 4.407552346122301, 4.398416693261407, 4.389066919293349, 4.379498454525876, 4.369706655032621, 4.359686802733944, 4.349434105567926, 4.338943697757353, 4.328210640178881, 4.317229920840759, 4.305996455475847, 4.294505088256901, 4.282750592641389, 4.270727672353494, 4.258430962511137, 4.24585503090632, 4.232994379447265, 4.219843445771325, 4.206396605037776, 4.192648171910202, 4.17859240273825, 4.164223497949143, 4.149535604659495, 4.13452281951848, 4.119179191793656, 4.103498726711192, 4.087475389062547, 4.07110310709003, 4.05437577666405, 4.037287265765142, 4.019831419284293, 4.002002064155362, 3.983793014833677, 3.965198079135321, 3.946211064451774, 3.926825784354935, 3.907036065607763, 3.88683575559601, 3.866218730196709, 3.845178902099231, 3.823710229594879, 3.801806725851023, 3.779462468685847, 3.756671610859745, 3.733428390899332, 3.709727144469887, 3.685562316311851, 3.66092847275668, 3.635820314837028, 3.61023269200572, 3.584160616477459, 3.557599278206474, 3.530544060512605, 3.50299055636731, 3.474934585350091, 3.446372211284629, 3.417299760562456, 3.387713841160609, 3.35761136235786, 3.326989555152262, 3.295845993380632, 3.264178615538209, 3.231985747294189, 3.199266124695965, 3.166018918051843, 3.132243756478628, 3.097940753096756, 3.06311053085177, 3.027754248936528, 2.991873629784004, 2.955470986595471, 2.918549251363545, 2.881112003343826, 2.843163497922737, 2.804708695822634, 2.765753292578337, 2.72630374821181, 2.686367317023963, 2.645952077414285, 2.605066961630319, 2.563721785339953, 2.521927276909764, 2.479695106262882, 2.437037913179229, 2.393969334890282, 2.350504032809281, 2.306657718226162, 2.262447176784722, 2.217890291547193, 2.173006064439079, 2.127814635854465, 2.082337302189193, 2.036596531056632, 1.990615973927914, 1.944420475925941, 1.898036082490117, 1.851490042616688, 1.804810808368158, 1.758028030334377, 1.711172548717916, 1.664276379707369, 1.617372696794442, 1.570495806684429, 1.523681119445026, 1.476965112535759, 1.430385288359961, 1.383980124983181, 1.337789019667037, 1.291852224875587, 1.246210776422989, 1.200906413447042, 1.155981489912842, 1.111478877375914, 1.067441858763684, 1.023914012969726, 0.9809390900967607, 0.9385608772319679, 0.8968230546932484, 0.8557690427468013, 0.8154418388661125, 0.7758838456802892, 0.7371366898454228, 0.6992410321676306, 0.6622363694097745, 0.6261608283263392, 0.5910509525927437, 0.5569414834256043, 0.5238651348302122, 0.4918523645591966, 0.4609311420223908, 0.4311267145514805, 0.4024613735927406, 0.3749542225768727, 0.3486209483944261, 0.3234735985874832, 0.2995203665512665, 0.276765387220438, 0.2552085458922962, 0.2348453030091591, 0.2156665378822, 0.1976584144850895, 0.1808022725736494, 0.1650745474933936, 0.1504467221149587, 0.1368853143831066, 0.1243519039726304, 0.1128032015079348, 0.1021911637166105, 0.09246315774415421, 0.08356217765097028, 0.07542812726249118, 0.06800353360458213, 0.06123476337488273, 0.05507148358060911, 0.04946652079069661, 0.04437572170348437, 0.03975781538296561, 0.03557427749271858, 0.03178919683225922, 0.02836914445588777, 0.02528304562927302, 0.02250205485415371, 0.01999943416679275, 0.01775043489130141, 0.01573218300479894, 0.01392356824769696, 0.01230513708930514, 0.01085898963655852, 0.009568680552049758, 0.008419124026807678, 0.007396502833470884, 0.006488181466727873, 0.005682623360197713, 0.004969312152350846, 0.004338676958661677, 0.003782021592971534, 0.003291457668040558, 0.002859841493493733, 0.002480714678817222, 0.002148248339730982, 0.001857190798135248, 0.001602818658876824, 0.001380891140776342, 0.001187607534659279, 0.001019567657498575, 0.0008737351691545322, 0.0007474036165334803, 0.0006381650692224935, 0.0005438812107308944, 0.0004626567503164404, 0.000392815021929153, 0.0003328756390014156, 0.0002815340765825461, 0.0002376430555922307, 0.0002001956076843599, 0.0001683097033064925, 0.0001412143299479, 0.0001182369122312489, 9.879197036222999e-05, 8.237091845417973e-05, 6.853290934058852e-05, 5.689663763068833e-05, 4.713301790935728e-05, 3.895866009347679e-05, 3.213006899785298e-05, 2.643850010402839e-05, 2.170540833787091e-05, 1.777843132373987e-05, 1.452785307496484e-05, 1.184349838661742e-05, 9.632012304740086e-06, 7.81448294711638e-06, 6.324369638058564e-06, 5.105701789771296e-06, 4.11151721751365e-06, 3.302511591401005e-06, 2.645873631104524e-06, 2.11428303138203e-06, 1.685050882216182e-06, 1.339384283183297e-06, 1.061758874644829e-06, 8.393849586540455e-07, 6.617545331358513e-07, 5.202580637270399e-07, 4.078611833673383e-07, 3.188327358395003e-07, 2.485166764492469e-07, 1.931413209178936e-07, 1.496603020863413e-07, 1.156203626197759e-07, 8.905178958196727e-08, 6.837789209546528e-08, 5.23404444173684e-08, 3.993847123240079e-08, 3.037814686975462e-08, 2.3031922037771e-08, 1.740528657093105e-08, 1.310983011627949e-08, 9.841479675697069e-09, 7.362978015756317e-09, 5.48982425584666e-09, 4.079031119715985e-09, 3.020165659284195e-09, 2.228234636852052e-09, 1.638054746604005e-09, 1.199813721079887e-09, 8.755830724529808e-10, 6.365884756451236e-10, 4.610810945943153e-10, 3.326837635412079e-10, 2.391109594174158e-10, 1.711818763492609e-10, 1.220624353199593e-10, 8.668540483694429e-11, 6.130853923993658e-11, 4.317923539664254e-11, 3.028106244543507e-11, 2.114296210772206e-11, 1.46962208742323e-11, 1.016770372606342e-11, 7.000495038094913e-12, 4.795165039051853e-12, 3.266490546572947e-12, 2.211704174907164e-12, 1.487305126598777e-12, 9.921995376952557e-13, 6.554991711461721e-13, 4.277314275812491e-13, 2.745442681446119e-13, 1.721892442731083e-13, 1.046364512878425e-13, 5.962788769869437e-14, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ] + } +} \ No newline at end of file diff --git a/verification/test15/Li.pz-s-kjpaw_psl.0.2.1.UPF.json b/verification/test15/Li.pz-s-kjpaw_psl.0.2.1.UPF.json new file mode 100644 index 000000000..a7e1a7a86 --- /dev/null +++ b/verification/test15/Li.pz-s-kjpaw_psl.0.2.1.UPF.json @@ -0,0 +1,274 @@ +{ + "pseudo_potential": { + "radial_grid": [ + 0.0003039606551848388, 0.0003077840095563408, 0.0003116554558055274, 0.0003155755988537513, 0.0003195450512313399, 0.000323564433173303, 0.0003276343727162462, 0.0003317555057965038, 0.0003359284763495035, 0.0003401539364103849, 0.000344432546215879, 0.0003487649743074729, 0.00035315189763587, 0.0003575940016667656, 0.0003620919804879526, 0.0003666465369177728, 0.0003712583826149344, 0.0003759282381897108, 0.0003806568333165358, 0.0003854449068480192, 0.0003902932069303915, 0.0003952024911204043, 0.0004001735265037003, 0.0004052070898146716, 0.000410303967557827, 0.0004154649561306832, 0.0004206908619482049, 0.0004259825015688081, 0.0004313407018219494, 0.00043676629993732, 0.0004422601436756639, 0.0004478230914612423, 0.0004534560125159646, 0.0004591597869952057, 0.0004649353061253329, 0.0004707834723429605, 0.0004767051994359582, 0.0004827014126862319, 0.000488773049014301, 0.0004949210571256956, 0.0005011463976591908, 0.0005074500433369106, 0.0005138329791163168, 0.0005202962023441107, 0.0005268407229120723, 0.0005334675634148547, 0.0005401777593097689, 0.0005469723590785753, 0.0005538524243913113, 0.0005608190302721804, 0.0005678732652675246, 0.0005750162316159141, 0.0005822490454203733, 0.0005895728368227735, 0.0005969887501804232, 0.0006044979442448717, 0.000612101592342969, 0.0006198008825602004, 0.0006275970179263258, 0.0006354912166033588, 0.0006434847120759031, 0.000651578753343889, 0.0006597746051177322, 0.0006680735480159464, 0.0006764768787652447, 0.000684985910403153, 0.0006936019724831764, 0.0007023264112825447, 0.0007111605900125694, 0.0007201058890316515, 0.0007291637060609617, 0.0007383354564028389, 0.0007476225731619345, 0.0007570265074691357, 0.000766548728708312, 0.0007761907247459046, 0.000785954002163411, 0.0007958400864927911, 0.0008058505224548345, 0.0008159868742005277, 0.0008262507255554528, 0.0008366436802672662, 0.0008471673622562861, 0.0008578234158692333, 0.0008686135061361661, 0.0008795393190306405, 0.0008906025617331502, 0.0009018049628978756, 0.0009131482729227894, 0.0009246342642231629, 0.0009362647315085043, 0.0009480414920629891, 0.0009599663860294143, 0.0009720412766967226, 0.0009842680507911488, 0.00099664861877102, 0.001009184915125272, 0.001021878898675714, 0.001034732552883103, 0.00104774788615706, 0.001060926932169889, 0.001074271750174342, 0.001087784425325383, 0.001101467069005994, 0.001115321819157091, 0.00112935084061157, 0.001143556325432578, 0.001157940493256018, 0.001172505591637376, 0.00118725389640291, 0.001202187712005244, 0.001217309371883448, 0.001232621238827644, 0.001248125705348193, 0.001263825194049536, 0.001279722158008721, 0.00129581908115871, 0.001312118478676494, 0.001328622897376097, 0.00134533491610652, 0.001362257146154689, 0.001379392231653479, 0.00139674284999486, 0.001414311712248247, 0.001432101563584113, 0.001450115183702923, 0.001468355387269469, 0.001486825024352667, 0.001505526980870889, 0.001524464179042891, 0.001543639577844415, 0.001563056173470537, 0.001582716999803826, 0.001602625128888395, 0.001622783671409921, 0.001643195777181684, 0.001663864635636739, 0.001684793476326271, 0.001705985569424217, 0.001727444226238242, 0.001749172799727128, 0.001771174685024692, 0.001793453319970274, 0.001816012185645908, 0.001838854806920257, 0.001861984752999367, 0.001885405637984369, 0.001909121121436195, 0.00193313490894738, 0.001957450752721084, 0.001982072452157365, 0.002007003854446853, 0.002032248855171879, 0.002057811398915167, 0.002083695479876187, 0.002109905142495249, 0.002136444482085461, 0.002163317645472629, 0.002190528831643204, 0.002218082292400391, 0.002245982333028489, 0.002274233312965609, 0.002302839646484844, 0.002331805803384004, 0.002361136309684042, 0.002390835748336233, 0.002420908759938297, 0.002451360043459479, 0.002482194356974779, 0.002513416518408425, 0.002545031406286654, 0.002577043960500023, 0.002609459183075258, 0.002642282138956831, 0.002675517956798387, 0.00270917182976407, 0.00274324901634001, 0.002777754841155936, 0.002812694695817177, 0.002848074039747116, 0.002883898401040211, 0.002920173377325809, 0.002956904636642753, 0.002994097918325039, 0.003031759033898608, 0.00306989386798938, 0.003108508379242775, 0.003147608601254737, 0.003187200643514502, 0.003227290692359231, 0.003267885011940609, 0.003308989945203671, 0.003350611914877862, 0.003392757424480623, 0.003435433059333583, 0.003478645487591501, 0.003522401461284218, 0.003566707817371643, 0.00361157147881205, 0.003656999455643829, 0.003702998846080769, 0.003749576837621222, 0.003796740708171126, 0.003844497827181207, 0.003892855656798484, 0.003941821753032206, 0.00399140376693454, 0.004041609445796033, 0.004092446634356145, 0.004143923276029031, 0.004196047414144666, 0.004248827193205689, 0.004302270860159958, 0.00435638676568916, 0.004411183365513647, 0.00446666922171361, 0.004522853004066978, 0.004579743491404047, 0.004637349572979206, 0.004695680249859912, 0.004754744636333085, 0.004814551961329293, 0.004875111569864741, 0.004936432924501467, 0.004998525606825906, 0.005061399318945982, 0.005125063885007144, 0.005189529252727363, 0.005254805494951495, 0.005320902811225207, 0.005387831529388625, 0.005455602107190143, 0.005524225133920418, 0.005593711332066986, 0.005664071558989694, 0.005735316808617128, 0.005807458213164505, 0.005880507044873054, 0.005954474717771344, 0.006029372789458771, 0.006105212962911393, 0.006182007088310629, 0.006259767164894803, 0.006338505342834085, 0.006418233925128977, 0.006498965369532665, 0.006580712290497594, 0.006663487461146477, 0.00674730381526813, 0.00683217444933842, 0.00691811262456658, 0.007005131768967343, 0.007093245479459058, 0.007182467523988257, 0.007272811843680924, 0.007364292555020815, 0.0074569239520552, 0.007550720508628324, 0.007645696880642977, 0.007741867908350525, 0.007839248618669702, 0.007937854227534626, 0.008037700142272291, 0.008138801964010012, 0.008241175490113132, 0.008344836716653382, 0.008449801840908315, 0.008556087263892145, 0.008663709592918449, 0.008772685644195104, 0.008883032445451829, 0.008994767238600829, 0.009107907482430856, 0.009222470855335193, 0.009338475258073941, 0.009455938816571034, 0.009574879884746477, 0.00969531704738416, 0.009817269123035764, 0.009940755166961237, 0.01006579447410617, 0.0101924065821167, 0.0103206112743923, 0.01045042858317696, 0.01058187879268932, 0.01071498244229202, 0.01084976032970114, 0.01098623351423579, 0.01112442332010869, 0.01126435133975819, 0.01140603943722201, 0.01154950975155367, 0.01169478470028168, 0.01184188698291232, 0.01199083958447656, 0.01214166577912141, 0.01229438913374667, 0.01244903351168721, 0.01260562307644169, 0.01276418229544822, 0.01292473594390734, 0.01308730910865327, 0.01325192719207377, 0.01341861591607928, 0.01358740132612207, 0.01375830979526585, 0.01393136802830664, 0.01410660306594544, 0.0142840422890134, 0.01446371342275012, 0.01464564454113581, 0.01482986407127784, 0.01501640079785261, 0.01520528386760314, 0.01539654279389337, 0.01559020746131966, 0.01578630813038031, 0.01598487544220382, 0.01618594042333666, 0.01638953449059119, 0.01659568945595465, 0.01680443753155979, 0.01701581133471814, 0.01722984389301647, 0.01744656864947746, 0.01766601946778527, 0.01788823063757671, 0.01811323687979917, 0.01834107335213575, 0.0185717756544988, 0.01880537983459245, 0.0190419223935451, 0.01928144029161283, 0.01952397095395452, 0.01976955227647957, 0.02001822263176932, 0.02027002087507266, 0.02052498635037741, 0.02078315889655782, 0.02104457885359946, 0.02130928706890252, 0.02157732490366418, 0.02184873423934145, 0.02212355748419521, 0.02240183757991658, 0.02268361800833673, 0.02296894279822086, 0.02325785653214789, 0.02355040435347654, 0.02384663197339907, 0.02414658567808382, 0.02445031233590738, 0.02475785940477797, 0.02506927493955081, 0.02538460759953677, 0.02570390665610557, 0.02602722200038439, 0.02635460415105356, 0.02668610426224019, 0.02702177413151108, 0.02736166620796627, 0.02770583360043419, 0.02805433008577013, 0.02840721011725896, 0.0287645288331235, 0.02912634206514013, 0.02949270634736244, 0.02986367892495491, 0.03023931776313752, 0.03061968155624296, 0.03100482973688783, 0.0313948224852589, 0.03178972073851655, 0.03218958620031629, 0.03259448135045002, 0.03300446945460877, 0.03341961457426792, 0.03383998157669703, 0.03426563614509548, 0.03469664478885555, 0.03513307485395478, 0.03557499453347874, 0.03602247287827644, 0.03647557980774966, 0.03693438612077799, 0.03739896350678124, 0.03786938455692126, 0.03834572277544415, 0.03882805259116567, 0.03931644936910086, 0.03981098942223987, 0.04031175002347223, 0.04081880941766065, 0.0413322468338671, 0.04185214249773244, 0.04237857764401187, 0.04291163452926808, 0.0434513964447239, 0.04399794772927676, 0.04455137378267684, 0.0451117610788709, 0.04567919717951416, 0.04625377074765177, 0.04683557156157273, 0.04742469052883789, 0.04802121970048442, 0.04862525228540914, 0.04923688266493229, 0.04985620640754505, 0.05048332028384223, 0.05111832228164282, 0.05176131162130101, 0.05241238877120922, 0.05307165546349678, 0.05373921470992563, 0.05441517081798614, 0.05509962940719552, 0.05579269742560086, 0.05649448316649006, 0.05720509628531295, 0.05792464781681505, 0.05865325019238717, 0.05939101725763283, 0.06013806429015695, 0.06089450801757826, 0.0616604666357681, 0.06243605982731901, 0.06322140878024511, 0.06401663620691807, 0.06482186636324122, 0.06563722506806469, 0.06646283972284506, 0.06729883933155183, 0.0681453545208247, 0.06900251756038427, 0.06987046238369927, 0.07074932460891437, 0.07163924156004021, 0.072540352288411, 0.07345279759441133, 0.07437672004947661, 0.0753122640183703, 0.07625957568174084, 0.07721880305896296, 0.07819009603126593, 0.07917360636515271, 0.08016948773611406, 0.08117789575264027, 0.08219898798053553, 0.08323292396753791, 0.08427986526824882, 0.0853399754693767, 0.0864134202152972, 0.0875003672339358, 0.08860098636297559, 0.08971544957639462, 0.09084393101133754, 0.0919866069953244, 0.09314365607380248, 0.09431525903804434, 0.0955015989533967, 0.09670286118788513, 0.09791923344117764, 0.09915090577391357, 0.1003980706374008, 0.1016609229036864, 0.1029396598960066, 0.104234481419618, 0.1055455897930178, 0.106873189879557, 0.1082174891194499, 0.1095786975621881, 0.1109570278993599, 0.1123526954978848, 0.1137659184336646, 0.1151969175256582, 0.1166459163703851, 0.1181131413768629, 0.1195988218019839, 0.1211031897863372, 0.1226264803904808, 0.124168931631671, 0.1257307845210524, 0.1273122831013164, 0.1289136744848338, 0.1305352088922663, 0.1321771396916643, 0.1338397234380556, 0.1355232199135331, 0.1372278921678463, 0.1389540065595028, 0.1407018327973885, 0.1424716439829089, 0.1442637166526625, 0.1460783308216499, 0.1479157700270266, 0.1497763213724072, 0.1516602755727246, 0.1535679269996554, 0.1554995737276162, 0.1574555175803382, 0.1594360641780281, 0.1614415229851209, 0.1634722073586355, 0.1655284345971366, 0.1676105259903136, 0.1697188068691831, 0.1718536066569214, 0.1740152589203388, 0.1762041014219992, 0.1784204761729968, 0.1806647294863958, 0.1829372120313422, 0.1852382788878572, 0.1875682896023187, 0.189927608243641, 0.1923166034601622, 0.1947356485372453, 0.1971851214556052, 0.1996654049503686, 0.2021768865708778, 0.204719958741246, 0.2072950188216734, 0.2099024691705368, 0.2125427172072579, 0.215216175475964, 0.2179232617099491, 0.2206643988969448, 0.2234400153452132, 0.2262505447504716, 0.2290964262636574, 0.2319781045595474, 0.2348960299062379, 0.237850658235501, 0.2408424512140242, 0.2438718763155473, 0.246939406893906, 0.250045522256994, 0.2531907077416563, 0.2563754547895237, 0.2596002610238016, 0.2628656303270256, 0.2661720729197924, 0.2695201054404843, 0.2729102510259941, 0.2763430393934668, 0.2798190069230692, 0.2833386967417995, 0.2869026588083528, 0.2905114499990528, 0.2941656341948651, 0.2978657823695053, 0.3016124726786533, 0.3054062905502927, 0.3092478287761845, 0.313137687604492, 0.3170764748335714, 0.3210648059069407, 0.3251033040094444, 0.3291926001646274, 0.3333333333333333, 0.3375261505135449, 0.3417717068414765, 0.3460706656939418, 0.3504236987920082, 0.3548314863059531, 0.3592947169615439, 0.3638140881476506, 0.3683903060252161, 0.3730240856375938, 0.3777161510222755, 0.3824672353240242, 0.3872780809094278, 0.392149439482897, 0.3970820722041197, 0.4020767498069935, 0.4071342527200567, 0.4122553711884284, 0.4174409053972883, 0.4226916655969068, 0.4280084722292471, 0.4333921560561638, 0.4388435582892073, 0.4443635307210678, 0.449952935858668, 0.4556126470579321, 0.4613435486602506, 0.4671465361306579, 0.4730225161977527, 0.4789724069953751, 0.4849971382060671, 0.4910976512063386, 0.4972748992137569, 0.5035298474358911, 0.5098634732211266, 0.5162767662113777, 0.522770728496723, 0.5293463747719813, 0.5360047324952613, 0.5427468420485037, 0.5495737569000427, 0.5564865437692144, 0.5634862827930306, 0.5705740676949551, 0.5777510059557988, 0.5850182189867662, 0.5923768423046796, 0.5998280257094031, 0.6073729334635033, 0.615012744474167, 0.6227486524774074, 0.63058186622459, 0.6385136096712989, 0.6465451221685851, 0.6546776586566162, 0.662912489860764, 0.6712509024901591, 0.6796941994387385, 0.6882436999888292, 0.6969007400172856, 0.705666672204225, 0.7145428662443866, 0.7235307090611477, 0.7326316050232343, 0.7418469761641564, 0.7511782624044029, 0.7606269217764347, 0.7701944306525016, 0.7798822839753307, 0.7896919954917139, 0.7996250979890327, 0.8096831435347629, 0.8198677037189835, 0.830180369899944, 0.8406227534527165, 0.8511964860209756, 0.861903219771949, 0.8727446276545673, 0.8837224036608693, 0.8948382630906907, 0.9060939428196817, 0.9174912015707024, 0.9290318201886172, 0.9407176019185665, 0.9525503726877219, 0.964531981390587, 0.9766643001779016, 0.9889492247491561, 1.001388674648811, 1.013984593566222, 1.026738949639344, 1.039653735762265, 1.05273096989659, 1.065972695386758, 1.079380981279321, 1.092957922646224, 1.106705640912184, 1.120626284186143, 1.134722027596939, 1.148995073633159, 1.163447652487281, 1.178082022404158, 1.19290047003386, 1.20790531078898, 1.223098889206416, 1.238483579313701, 1.254061784999972, 1.269835940391554, 1.285808510232324, 1.301981990268818, 1.318358907640192, 1.33494182127311, 1.351733322281559, 1.368736034371737, 1.385952614252004, 1.403385752047987, 1.421038171722941, 1.438912631503348, 1.457011924309919, 1.475338878193979, 1.493896356779355, 1.512687259709836, 1.53171452310223, 1.550981120005156, 1.570490060863581, 1.590244393989201, 1.610247206036761, 1.630501622486335, 1.651010808131704, 1.671777967574854, 1.692806345726694, 1.714099228314097, 1.735659942393283, 1.757491856869697, 1.779598383024393, 1.801982975047056, 1.824649130575735, 1.847600391243341, 1.870840343231052, 1.894372617828649, 1.91820089200191, 1.94232888896716, 1.966760378773006, 1.991499178889441, 2.016549154804317, 2.041914220627329, 2.067598339701624, 2.093605525223055, 2.119939840867276, 2.146605401424688, 2.173606373443371, 2.200946975880145, 2.228631480759757, 2.256664213842424, 2.285049555299727, 2.313791940399012, 2.342895860196434, 2.372365862238666, 2.402206551273488, 2.432422589969279, 2.46301869964355, 2.493999661000695, 2.52537031487895, 2.557135563006809, 2.589300368768926, 2.621869757981631, 2.65484881967826, 2.688242706904292, 2.722056637522549, 2.756295895028491, 2.790965829375755, 2.826071857812127, 2.861619465725965, 2.897614207503334, 2.934061707395871, 2.970967660399579, 3.00833783314471, 3.046178064796776, 3.084494267968963, 3.123292429645979, 3.162578612119509, 3.202358953935504, 3.242639670853296, 3.28342705681686, 3.324727484938242, 3.366547408493351, 3.408893361930337, 3.451771961890565, 3.495189908242523, 3.53915398512867, 3.583671062025452, 3.628748094816717, 3.674392126880535, 3.720610290189788, 3.76740980642653, 3.814797988110374, 3.862782239741134, 3.911370058955738, 3.960569037699798, 4.010386863413847, 4.060831320234491, 4.111910290210756, 4.163631754535604, 4.216003794793074, 4.269034594221014, 4.322732438989711, 4.377105719496677, 4.432162931677596, 4.487912678333903, 4.544363670476922, 4.601524728689031, 4.659404784501882, 4.718012881791936, 4.77735817819365, 4.837449946530309, 4.898297576262981, 4.959910574957616, 5.022298569770597, 5.08547130895306, 5.149438663374009, 5.214210628062724, 5.279797323770477, 5.346208998551892, 5.413456029366286, 5.48154892369902, 5.550498321203381, 5.620314995363016, 5.691009855175287, 5.762593946855888, 5.835078455564763, 5.908474707153881, 5.982794169936885, 6.058048456481022, 6.134249325421691, 6.211408683299672, 6.289538586421628, 6.36865124274389, 6.448759013779939, 6.529874416532007, 6.612010125446777, 6.69517897439589, 6.779393958681209, 6.864668237065345, 6.951015133827812, 7.038448140846875, 7.126980919707781, 7.216627303837359, 7.30740130066549, 7.399317093813888, 7.492389045312208, 7.586631697842243, 7.6820597770102, 7.778688193647574, 7.876532046141083, 7.975606622791717, 8.075927404203659, 8.177510065703125, 8.280370479787615, 8.384524718606132, 8.489989056470366, 8.596779972397687, 8.704914152685998, 8.814408493520935, 8.925280103616052, 9.03754630688597, 9.151224645153366, 9.266332880889896, 9.382888999991552, 9.500911214589106, 9.620417965893678, 9.741427927078314, 9.86396000619566, 9.988033349132342, 10.11366734260065, 10.24088161716764, 10.36969605032255, 10.50013076958266, 10.63220615563817, 10.76594284553691, 10.90136173590872, 11.03848398623077, 11.17733102213366, 11.31792453874918, 11.46028650410041, 11.60443916253402, 11.75040503819618, 11.89820693855197, 12.04786795794898, 12.19941148122601, 12.35286118736686, 12.50824105320033, 12.6655753571466, 12.82488868301071, 12.98620592382401, 13.14955228573352, 13.31495329194065, 13.48243478668915, 13.65202293930327, 13.82374424827694, 13.997625545414, 14.17369400002093, 14.35197712315199, 14.53250277190784, 14.71529915378843, 14.90039483110029, 15.08781872541959, 15.27760012211117, 15.46976867490432, 15.66435441052645, 15.86138773339457, 16.06089943036627, 16.26292067555015, 16.46748303517673, 16.67461847253094, 16.88435935294622, 17.0967384488619, 17.31178894494383, 17.52954444326948, 17.75003896857856, 17.97330697358919, 18.19938334438141, 18.42830340584816, 18.66010292721472, 18.89481812762796, 19.13248568181541, 19.373142725816, 19.61682686278249, 19.863576168857, 20.11342919912068, 20.36642499361779, 20.62260308345596, 20.88200349698297, 21.14466676604112, 21.41063393230065, 21.67994655367234, 21.95264671080123, 22.22877701364174, 22.50838060811538, 22.79150118285274, 23.07818297601955, 23.36847078222928, 23.66240995954231, 23.96004643655303, 24.26142671956658, 24.56659789986528, 24.87560766106697, 25.18850428657556, 25.5053366671253, 25.82615430842032, 26.15100733886972, 26.47994651742044, 26.81302324148844, 27.15028955498938, 27.49179815647104, 27.83760240734709, 28.18775634023534, 28.54231466740022, 28.90133278930163, 29.26486680325164, 29.63297351217944, 30.00571043350727, 30.38313580813748, 30.76530860955263, 31.15228855303058, 31.54413610497472, 31.94091249236229, 32.34267971231115, 32.74950054176669, 33.16143854731131, 33.57855809509638, 34.00092436089989, 34.42860334031028, 34.86166185903807, 35.30016758335801, 35.74418903068152, 36.1937955802632, 36.6490574840412, 37.11004587761412, 37.57683279135644, 38.04949116167301, 38.52809484239589, 39.01271861632397, 39.50343820690778, 40.00033029008186, 40.50347250624499, 41.01294347239218, 41.52882279439849, 42.05119107945739, 42.58012994867634, 43.11572204982966, 43.65805107027287, 44.20720175001889, 44.76325989497851, 45.32631239036837, 45.89644721428624, 46.47375345145839, 47.058321307159, 47.65024212130467, 48.24960838272705, 48.85651374362373, 49.4710530341922, 50.09332227744699, 50.72341870422321, 51.36144076836964, 52.0074881621317, 52.66166183172916, 53.3240639931289, 53.99479814801612, 54.67396909996733, 55.36168297082537, 56.05804721728182, 56.76317064766705, 57.47716343895144, 58.20013715396149, 58.93220475881082, 59.67348064055222, 60.42408062505046, 61.18412199508, 61.95372350865144, 62.73300541756683, 63.52208948621, 64.3210990105722, 65.13015883751699, 65.94939538428858, 66.77893665826394, 67.61891227695496, 68.46945348826112, 69.33069319097687, 70.20276595555825, 71.08580804514868, 71.97995743687137, 72.88535384338806, 73.80213873472906, 74.73045536039909, 75.67044877175937, 76.62226584469295, 77.5860553025541, 78.5619677394062, 79.55015564355328, 80.55077342136559, 81.56397742140679, 82.58992595886345, 83.62877934028153, 84.68069988861576, 85.74585196859165, 86.82440201238893, 87.91651854564641, 89.02237221379404, 90.14213580871767, 91.27598429575686, 92.42409484104466, 93.58664683918984, 94.76382194130697, 95.95580408340112, 97.16277951510672, 98.38493682879077, 99.6224669890202 + ], + "paw_data": { + "aug_integrals": [ + 0.3960392600307058, -0.008987255564312612, 0.0, 0.0, -0.008987255564312612, -0.0237558927567202, 0.0, 0.0, 0.0, 0.0, 0.01082879407556624, 0.01916025224445008, 0.0, 0.0, 0.01916025224445008, 0.03237927822587573 + ], + "occupations": [ + 2.0, 1.0, 0.0, 0.0 + ], + "ae_core_charge_density": [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "ae_wfc": [ + { + "index": 0, + "radial_function": [ + 0.002782716954089031, 0.00281771078328425, 0.002853144674513066, 0.002889024161739548, 0.002925354490280263, 0.002962140564859155, 0.002999388289964484, 0.003037103410899037, 0.003075291782813982, 0.003113959334043129, 0.003153112066836251, 0.003192756058287974, 0.003232897461263684, 0.003273542505336352, 0.00331469749773477, 0.003356368824303283, 0.00339856295047317, 0.003441286422245793, 0.003484545867187658, 0.003528347995437531, 0.003572699600725735, 0.003617607561405789, 0.003663078841498512, 0.003709120491748752, 0.003755739650694876, 0.003802943545751172, 0.003850739494303315, 0.003899134904817039, 0.003948137277960178, 0.003997754207738217, 0.004047993382643524, 0.004098862586818398, 0.004150369701232114, 0.004202522704872113, 0.004255329675949496, 0.004308798793118991, 0.004362938336713554, 0.004417756689993782, 0.004473262340412276, 0.004529463880893169, 0.004586370011126943, 0.004643989538880744, 0.004702331381324345, 0.004761404566371957, 0.004821218234040046, 0.004881781637821333, 0.00494310414607519, 0.005005195243434579, 0.00506806453222973, 0.005131721733928764, 0.005196176690595419, 0.005261439366364092, 0.005327519848932376, 0.005394428351071294, 0.005462175212153426, 0.005530770899699105, 0.005600226010940926, 0.005670551274406715, 0.005741757551521205, 0.005813855838226595, 0.005886857266622213, 0.005960773106623489, 0.006035614767640442, 0.006111393800275895, 0.006188121898043637, 0.006265810899106748, 0.006344472788036282, 0.006424119697590567, 0.006504763910515301, 0.006586417861364698, 0.006669094138343898, 0.00675280548517285, 0.00683756480297193, 0.006923385152169496, 0.007010279754431619, 0.007098261994614241, 0.007187345422737963, 0.00727754375598572, 0.007368870880723595, 0.007461340854544979, 0.007554967908338345, 0.007649766448378872, 0.007745751058444173, 0.007842936501954355, 0.007941337724136677, 0.008040969854215065, 0.008141848207624711, 0.008243988288252029, 0.008347405790700223, 0.008452116602580727, 0.008558136806830758, 0.00866548268405727, 0.008774170714907553, 0.008884217582466757, 0.008995640174682577, 0.009108455586817412, 0.00922268112392823, 0.009338334303374414, 0.00945543285735389, 0.009573994735467776, 0.009694038107313833, 0.009815581365109025, 0.009938643126341414, 0.01006324223645173, 0.01018939777154484, 0.01031712904113142, 0.0104464555909001, 0.01057739720552047, 0.01070997391147698, 0.01084420597993436, 0.0109801139296345, 0.01111771852982539, 0.01125704080322216, 0.01139810202900067, 0.01154092374582388, 0.01168552775490124, 0.01183193612308157, 0.01198017118597943, 0.01213025555113563, 0.01228221210121179, 0.01243606399721964, 0.01259183468178496, 0.01274954788244683, 0.01290922761499214, 0.01307089818682593, 0.01323458420037773, 0.01340031055654409, 0.0135681024581679, 0.01373798541355444, 0.0139099852400247, 0.01408412806750615, 0.01426044034216131, 0.01443894883005436, 0.01461968062085612, 0.01480266313158764, 0.01498792411040278, 0.01517549164040998, 0.01536539414353351, 0.0155576603844146, 0.0157523194743525, 0.01594940087528608, 0.01614893440381589, 0.01635095023526725, 0.0165554789077945, 0.01676255132652664, 0.01697219876775483, 0.01718445288316179, 0.01739934570409348, 0.01761690964587332, 0.01783717751215921, 0.01806018249934351, 0.01828595820099642, 0.01851453861235279, 0.01874595813484276, 0.0189802515806664, 0.0192174541774126, 0.01945760157272244, 0.01970072983899727, 0.01994687547815166, 0.02019607542641155, 0.02044836705915769, 0.02070378819581465, 0.02096237710478553, 0.02122417250843261, 0.02148921358810413, 0.02175753998920724, 0.02202919182632756, 0.02230420968839519, 0.0225826346438975, 0.02286450824613886, 0.02314987253854735, 0.02343877006002859, 0.02373124385036688, 0.02402733745567361, 0.02432709493388322, 0.02463056086029659, 0.02493778033317212, 0.02524879897936437, 0.02556366296001044, 0.02588241897626409, 0.02620511427507762, 0.02653179665503146, 0.02686251447221153, 0.02719731664613434, 0.02753625266571973, 0.0278793725953113, 0.02822672708074432, 0.0285783673554612, 0.02893434524667424, 0.02929471318157569, 0.02965952419359487, 0.03002883192870224, 0.03040269065176023, 0.03078115525292066, 0.0311642812540684, 0.03155212481531124, 0.03194474274151549, 0.03234219248888713, 0.03274453217159826, 0.03315182056845826, 0.03356411712962964, 0.03398148198338788, 0.03440397594292501, 0.03483166051319646, 0.03526459789781069, 0.03570285100596104, 0.03614648345939937, 0.03659555959945074, 0.03705014449406889, 0.03751030394493136, 0.03797610449457411, 0.03844761343356447, 0.03892489880771206, 0.0394080294253167, 0.03989707486445244, 0.04039210548028702, 0.04089319241243577, 0.04140040759234898, 0.04191382375073172, 0.04243351442499532, 0.04295955396673908, 0.04349201754926128, 0.04403098117509847, 0.04457652168359133, 0.04512871675847641, 0.045687644935502, 0.04625338561006689, 0.04682601904488067, 0.04740562637764383, 0.04799228962874644, 0.04858609170898344, 0.04918711642728512, 0.04979544849846097, 0.05041117355095505, 0.05103437813461094, 0.05166514972844465, 0.05230357674842304, 0.0529497485552459, 0.05360375546212962, 0.05426568874258987, 0.05493564063822145, 0.0556137043664724, 0.05629997412841026, 0.05699454511647788, 0.05769751352223575, 0.0584089765440888, 0.05912903239499407, 0.05985778031014689, 0.06059532055464236, 0.06134175443110886, 0.06209718428731069, 0.06286171352371615, 0.06363544660102799, 0.06441848904767238, 0.065210947467243, 0.06601292954589631, 0.06682454405969429, 0.06764590088189038, 0.0684771109901549, 0.06931828647373535, 0.07016954054054741, 0.07103098752419212, 0.07190274289089447, 0.07278492324635892, 0.07367764634253658, 0.07458103108429935, 0.07549519753601544, 0.07642026692802144, 0.0773563616629849, 0.07830360532215203, 0.07926212267147478, 0.08023203966761124, 0.08121348346379297, 0.08220658241555352, 0.08321146608631104, 0.08422826525279868, 0.08525711191033591, 0.08629813927793348, 0.08735148180322513, 0.08841727516721856, 0.08949565628885763, 0.09058676332938866, 0.09169073569652239, 0.09280771404838337, 0.09393784029723852, 0.09508125761299599, 0.09623811042646582, 0.09740854443237264, 0.09859270659211185, 0.09979074513623909, 0.1010028095666835, 0.1022290506586746, 0.1034696204623726, 0.1047246723041913, 0.105994360787803, 0.1072788417948146, 0.1085782724851028, 0.1098928112967974, 0.1112226179459003, 0.1125678534255292, 0.1139286800047711, 0.1153052612271359, 0.1166977619085946, 0.1181063481351896, 0.1195311872602042, 0.1209724479008754, 0.122430299934638, 0.1239049144948832, 0.1253964639662185, 0.1269051219792124, 0.128431063404609, 0.1299744643469957, 0.131535502137909, 0.1331143553283603, 0.1347112036807652, 0.1363262281602598, 0.137959610925384, 0.1396115353181167, 0.1412821858532412, 0.1429717482070241, 0.1446804092051881, 0.1464083568101573, 0.1481557801075578, 0.1499228692919499, 0.1517098156517739, 0.153516811553486, 0.1553440504248639, 0.157191726737459, 0.1590600359881734, 0.1609491746799385, 0.1628593403014712, 0.1647907313060843, 0.166743547089527, 0.16871798796683, 0.1707142551481312, 0.1727325507134541, 0.1747730775864158, 0.1768360395068357, 0.1789216410022184, 0.181030087358084, 0.1831615845871171, 0.1853163393971068, 0.1874945591576477, 0.1896964518655737, 0.1919222261090945, 0.1941720910306037, 0.1964462562881295, 0.1987449320153943, 0.2010683287804543, 0.2034166575428851, 0.2057901296094811, 0.2081889565884359, 0.2106133503419704, 0.2130635229373743, 0.2155396865964264, 0.2180420536431593, 0.2205708364499334, 0.2231262473817836, 0.2257084987390041, 0.2283178026979325, 0.2309543712498995, 0.2336184161383026, 0.2363101487937707, 0.2390297802673781, 0.2417775211618704, 0.2445535815608643, 0.247358170955981, 0.2501914981718748, 0.2530537712891157, 0.2559451975648872, 0.2588659833514584, 0.2618163340123901, 0.2647964538364327, 0.267806545949077, 0.2708468122217152, 0.2739174531783706, 0.2770186678999545, 0.2801506539260103, 0.2833136071538993, 0.2865077217353907, 0.2897331899706099, 0.2929902021993067, 0.2962789466893985, 0.2995996095227493, 0.3029523744781403, 0.3063374229113933, 0.3097549336326026, 0.3132050827804379, 0.3166880436934725, 0.3202039867785017, 0.3237530793758071, 0.3273354856213288, 0.3309513663057069, 0.3346008787301515, 0.3382841765591059, 0.3420014096696625, 0.3457527239976972, 0.3495382613806851, 0.3533581593971628, 0.3572125512028022, 0.3611015653630645, 0.3650253256824003, 0.3689839510299661, 0.3729775551618265, 0.3770062465396152, 0.3810701281456259, 0.385169297294308, 0.3893038454401451, 0.3934738579818913, 0.3976794140631461, 0.4019205863692489, 0.406197440920478, 0.4105100368615366, 0.4148584262473162, 0.4192426538249255, 0.4236627568119792, 0.428118764671139, 0.4326106988809068, 0.4371385727026703, 0.441702390944002, 0.4463021497182225, 0.4509378362002326, 0.455609428378632, 0.4603168948041383, 0.4650601943343271, 0.4698392758747184, 0.4746540781162352, 0.479504529269069, 0.4843905467929861, 0.4893120371241172, 0.4942688953982752, 0.4992610051708514, 0.5042882381333448, 0.5093504538265867, 0.5144474993507232, 0.5195792090720296, 0.5247454043266315, 0.5299458931212166, 0.5351804698308246, 0.5404489148938112, 0.5457509945040876, 0.5510864603007417, 0.5564550490551586, 0.5618564823557597, 0.567290466290491, 0.5727566911271958, 0.5782548309920178, 0.583784543545983, 0.5893454696599246, 0.5949372330879155, 0.6005594401393858, 0.6062116793501111, 0.6118935211522651, 0.617604517543738, 0.6233442017569348, 0.629112087927273, 0.6349076707616118, 0.6407304252068522, 0.6465798061189592, 0.6524552479326674, 0.6583561643321388, 0.6642819479228553, 0.6702319699050372, 0.6762055797488908, 0.6822021048719966, 0.6882208503191637, 0.6942610984450858, 0.7003221086001437, 0.7064031168197132, 0.7125033355173477, 0.7186219531822142, 0.7247581340811771, 0.7309110179659324, 0.7370797197856062, 0.7432633294052469, 0.7494609113306471, 0.7556715044399468, 0.76189412172248, 0.7681277500253348, 0.774371349808116, 0.7806238549064034, 0.7868841723044137, 0.7931511819173854, 0.7994237363842145, 0.8057006608708838, 0.811980752885233, 0.8182627821036357, 0.8245454902101508, 0.8308275907487306, 0.8371077689890785, 0.8433846818067526, 0.8496569575781283, 0.855923196090833, 0.8621819684702816, 0.8684318171229438, 0.8746712556969842, 0.8808987690609195, 0.8871128133009478, 0.8933118157376065, 0.8994941749624205, 0.9056582608952073, 0.9118024148627114, 0.9179249496992354, 0.9240241498699461, 0.9300982716175298, 0.9361455431328708, 0.9421641647504281, 0.9481523091689857, 0.9541081216984414, 0.960029720533306, 0.9659151970535723, 0.9717626161536094, 0.9775700165997326, 0.983335411417091, 0.9890567883065017, 0.9947321100918558, 1.000359315198704, 1.005936318164618, 1.011461010181915, 1.016931259673303, 1.022344912901013, 1.02769979460993, 1.032993708705248, 1.038224438965147, 1.043389749788931, 1.048487386981111, 1.053515078571816, 1.058470535673949, 1.063351453377431, 1.068155511680886, 1.072880376461047, 1.077523700480168, 1.082083124431664, 1.086556278024186, 1.090940781104272, 1.095234244817723, 1.099434272809748, 1.103538462463942, 1.107544406180081, 1.111449692690671, 1.115251908416172, 1.11894863885873, 1.122537470034235, 1.126015989942449, 1.129381790074905, 1.132632466960233, 1.135765623746492, 1.138778871820054, 1.141669832460521, 1.144436138531087, 1.147075436203725, 1.149585386718484, 1.15196366817615, 1.154207977363457, 1.156316031609951, 1.158285570675571, 1.160114358667953, 1.161800185988356, 1.163340871305101, 1.164734263553308, 1.165978243959661, 1.167070728090885, 1.168009667924516, 1.16879305394051, 1.169418917232168, 1.169885331634764, 1.170190415870227, 1.170332335706145, 1.170309306127295, 1.170119593517849, 1.169761517852322, 1.169233454893287, 1.168533838393795, 1.16766116230239, 1.166613982968554, 1.16539092134632, 1.163990665193779, 1.162411971266105, 1.160653667499683, 1.158714655184876, 1.156593911124871, 1.154290489778039, 1.151803525381151, 1.149132234050748, 1.146275915859919, 1.143233956887672, 1.140005831238063, 1.136591103026149, 1.132989428327835, 1.129200557090608, 1.12522433500212, 1.121060705313513, 1.116709710614387, 1.112171494556214, 1.107446303521002, 1.102534488231963, 1.097436505302906, 1.092152918723035, 1.086684401273811, 1.081031735874521, 1.075195816853124, 1.069177651138979, 1.062978359374007, 1.05659917693881, 1.050041454890294, 1.043306660807289, 1.036396379540676, 1.029312313864536, 1.022056285024784, 1.014630233181836, 1.007036217743782, 0.999276417586619, 0.9913531311580714, 0.9832687764615794, 0.9750258909170507, 0.9666271310950186, 0.9580752723208886, 0.9493732081460111, 0.9405239496823765, 0.9315306247978006, 0.9223964771685434, 0.9131248651866013, 0.903719260717426, 0.8941832470265664, 0.8845205149432162, 0.8747348613872253, 0.8648301912744889, 0.8548105141705346, 0.8446799424058908, 0.8344426887454044, 0.8241030638916412, 0.8136654738212841, 0.8031344169533016, 0.7925144811478971, 0.7818103405355045, 0.7710267521753793, 0.7601685525435997, 0.7492406538506233, 0.7382480401888739, 0.7271957635111412, 0.7160889394409548, 0.7049327429164618, 0.6937324036697414, 0.6824932015438855, 0.6712204616506093, 0.6599195493715999, 0.6485958652072671, 0.6372548394770423, 0.6259019268758685, 0.6145426008920282, 0.6031823480919932, 0.5918266622785219, 0.5804810385287869, 0.569150967119906, 0.557841927349833, 0.5465593812621896, 0.5353087672842489, 0.5240954937879289, 0.5129249325843347, 0.5018024123630755, 0.4907332120882986, 0.4797225543641219, 0.4687755987829087, 0.4578974352706099, 0.447093077444211, 0.4363674559971504, 0.4257254121294227, 0.4151716910399461, 0.404710935499643, 0.3943476795245495, 0.3840863421691225, 0.3739312214607297, 0.3638864884970675, 0.3539561817289328, 0.3441442014513292, 0.3344543045262986, 0.324890099361078, 0.3154550411651461, 0.3061524275094077, 0.2969853942101068, 0.2879569115590331, 0.2790697809201509, 0.2703266317109191, 0.261729918784279, 0.2532819202245788, 0.2449847355676227, 0.2368402844516299, 0.2288503057022588, 0.2210163568510869, 0.2133398140831597, 0.2058218726055535, 0.1984635474254624, 0.1912656745232305, 0.1842289124031042, 0.1773537440023521, 0.1706404789378337, 0.1640892560681068, 0.1577000463487357, 0.1514726559585547, 0.1454067296751852, 0.1395017544790294, 0.1337570633661707, 0.1281718393520159, 0.1227451196490225, 0.117475800003391, 0.1123626391770916, 0.1074042635629868, 0.1025991719220554, 0.09794574023279894, 0.09344222664379612, 0.08908677652106062, 0.0848774275823569, 0.08081211511094663, 0.07688867724139231, 0.07310486031005288, 0.06945832426279122, 0.06594664811219547, 0.0625673354363219, 0.059317819910612, 0.05619547086424664, 0.05319759885178949, 0.05032146123056016, 0.04756426773377823, 0.04492318602914504, 0.04239534725219314, 0.03997785150344197, 0.03766777329816068, 0.03546216695736096, 0.03335807192853002, 0.03135251802456817, 0.02944253056942155, 0.02762513543899727, 0.02589736398611831, 0.02425625783851661, 0.02269887355917488, 0.02122228715870825, 0.0198235984499238, 0.01849993523520555, 0.01724845731794185, 0.01606636032983634, 0.01495087936661895, 0.01389929242539452, 0.01290892363762827, 0.01197714629256485, 0.01110138564670424, 0.01027912151580862, 0.00950789064678274, 0.008785288867651194, 0.008108973014741765, 0.007476662637070715, 0.006886141478805549, 0.006335258741548955, 0.0058219301290379, 0.005344138677678987, 0.004899935377139804, 0.004487439585981231, 0.004104839248042677, 0.003750390915976883, 0.003422419588969117, 0.003119318372263861, 0.0028395479666574, 0.002581635996594272, 0.002344176185927313, 0.002125827390763242, 0.001925312499117178, 0.00174141720733956, 0.001572988683457271, 0.00141893412768774, 0.001278219240441158, 0.001149866608122779, 0.001032954016986381, 0.0009266127051732982, 0.000830025562901455, 0.0007424252905484189, 0.0006630925241046621, 0.0005913539371615143, 0.0005265803282462763, 0.0004681847019285781, 0.0004156203517013193, 0.0003683789521905884, 0.0003259886677760365, 0.0002880122842105875, 0.0002540453693202501, 0.0002237144683454482, 0.0001966753389586983, 0.0001726112304635633, 0.0001512312111503379, 0.0001322685472583229, 0.0001154791364760257, 0.0001006399984020041, 8.754782389283852e-05, 7.601758474287825e-05, 6.588120467451082e-05, 5.698629216871205e-05, 4.919493523389763e-05, 4.238255779622065e-05, 3.643683699521201e-05, 3.125668028282036e-05, 2.675126084807199e-05, 2.283910951891865e-05, 1.944726092080441e-05, 1.651045128921984e-05, 1.39703649323465e-05, 1.177492587661387e-05, 9.8776311233219e-06, 8.236916256783757e-06, 6.815592465775609e-06, 5.580080621321123e-06, 4.504806879247951e-06, 3.539724597409186e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "1S", + "angular_momentum": 0 + }, + { + "index": 1, + "radial_function": [ + 0.000497454580383377, 0.0005037102797252505, 0.0005100446471019539, 0.0005164586717969174, 0.0005229532914767966, 0.0005295293829028749, 0.0005361880016024333, 0.0005429301746352488, 0.000549756948699868, 0.0005566693835773145, 0.0005636685522621717, 0.0005707555411286323, 0.0005779314500960172, 0.0005851973927962433, 0.0005925544967433325, 0.0006000039035049758, 0.0006075467688761786, 0.00061518426305501, 0.0006229175708204825, 0.0006307478917125847, 0.0006386764402144934, 0.0006467044459369897, 0.0006548331538051055, 0.000663063824247025, 0.0006713977333852677, 0.0006798361732301802, 0.0006883804518757623, 0.0006970318936978544, 0.0007057918395547133, 0.000714661646990005, 0.0007236426904382404, 0.0007327363614326838, 0.0007419440688157603, 0.0007512672389519928, 0.0007607073159434962, 0.0007702657618480583, 0.0007799440568998368, 0.000789743699732702, 0.0007996662076062556, 0.0008097131166345556, 0.0008198859820175778, 0.0008301863782754452, 0.0008406158994854553, 0.0008511761595219404, 0.0008618687922989871, 0.0008726954520160516, 0.0008836578134065024, 0.0008947575719891206, 0.0009059964443225955, 0.000917376168263043, 0.0009288985032245859, 0.0009405652304430274, 0.0009523781532426512, 0.0009643390973061863, 0.0009764499109479664, 0.0009887124653903242, 0.001001128655043254, 0.001013700397787377, 0.001026429635260246, 0.001039318333146033, 0.001052368481468621, 0.001065582094888155, 0.001078961213001076, 0.001092507900643686, 0.001106224248199272, 0.001120112371908838, 0.001134174414185474, 0.001148412543932409, 0.001162828956864783, 0.001177425875835177, 0.00119220555116295, 0.001207170260967405, 0.001222322311504847, 0.001237664037509558, 0.001253197802538735, 0.001268925999321437, 0.001284851050111576, 0.001300975407044998, 0.001317301552500699, 0.001333831999466215, 0.001350569291907227, 0.001367516005141434, 0.001384674746216732, 0.001402048154293737, 0.001419638901032708, 0.001437449690984912, 0.001455483261988468, 0.001473742385568725, 0.001492229867343216, 0.001510948547431237, 0.001529901300868083, 0.001549091038024019, 0.001568520705027992, 0.001588193284196168, 0.001608111794465319, 0.001628279291831114, 0.001648698869791366, 0.001669373659794276, 0.001690306831691731, 0.001711501594197697, 0.001732961195351756, 0.001754688922987851, 0.001776688105208261, 0.001798962110862888, 0.001821514350033876, 0.001844348274525634, 0.001867467378360302, 0.001890875198278719, 0.00191457531424693, 0.001938571349968297, 0.001962866973401263, 0.001987465897282805, 0.002012371879657659, 0.002037588724413326, 0.002063120281820954, 0.002088970449082116, 0.002115143170881548, 0.002141642439945907, 0.002168472297608584, 0.002195636834380638, 0.002223140190527896, 0.002250986556654272, 0.002279180174291368, 0.002307725336494389, 0.002336626388444442, 0.002365887728057266, 0.002395513806598438, 0.002425509129305126, 0.002455878256014414, 0.002486625801798282, 0.00251775643760526, 0.002549274890908842, 0.002581185946362686, 0.002613494446462659, 0.002646205292215787, 0.002679323443816146, 0.002712853921327764, 0.002746801805374562, 0.002781172237837405, 0.002815970422558302, 0.002851201626051807, 0.002886871178223669, 0.002922984473096788, 0.002959546969544513, 0.002996564192031342, 0.00303404173136106, 0.003071985245432377, 0.003110400460002098, 0.003149293169455878, 0.003188669237586614, 0.0032285345983805, 0.003268895256810808, 0.003309757289639435, 0.003351126846226246, 0.003393010149346268, 0.003435413496014767, 0.003478343258320269, 0.003521805884265521, 0.003565807898616483, 0.003610355903759341, 0.003655456580565614, 0.003701116689265365, 0.003747343070328557, 0.003794142645354598, 0.003841522417970083, 0.003889489474734787, 0.003938050986055916, 0.003987214207110662, 0.004036986478777078, 0.004087375228573283, 0.004138387971605054, 0.004190032311521792, 0.004242315941480891, 0.004295246645120539, 0.004348832297540945, 0.004403080866294023, 0.00445800041238153, 0.004513599091261675, 0.004569885153864202, 0.004626866947613952, 0.0046845529174629, 0.004742951606930675, 0.004802071659153551, 0.004861921817941912, 0.004922510928846166, 0.004983847940231111, 0.005045941904358735, 0.005108801978479424, 0.005172437425931558, 0.005236857617249489, 0.005302072031279833, 0.005368090256306093, 0.005434921991181535, 0.005502577046470305, 0.005571065345596726, 0.005640396926002744, 0.00571058194031346, 0.005781630657510695, 0.005853553464114525, 0.005926360865372728, 0.006000063486458074, 0.00607467207367337, 0.006150197495664192, 0.006226650744639217, 0.006304042937598076, 0.006382385317566593, 0.006461689254839373, 0.006541966248229559, 0.006623227926325706, 0.006705486048755617, 0.006788752507457028, 0.006873039327955002, 0.006958358670645903, 0.007044722832087792, 0.007132144246297101, 0.007220635486051421, 0.007310209264198223, 0.007400878434969363, 0.007492655995301158, 0.007585555086159847, 0.007679588993872258, 0.007774771151461432, 0.007871115139987018, 0.007968634689890189, 0.008067343682342857, 0.00816725615060091, 0.008268386281361268, 0.008370748416122413, 0.008474357052548181, 0.008579226845834486, 0.008685372610078707, 0.00879280931965136, 0.00890155211056983, 0.009011616281873719, 0.00912301729700155, 0.009235770785168398, 0.009349892542744103, 0.009465398534631684, 0.009582304895645516, 0.009700627931888882, 0.009820384122130467, 0.009941590119179318, 0.01006426275125783, 0.01018841902337228, 0.01031407611868041, 0.01044125139985555, 0.01056996241044669, 0.01070022687623411, 0.01083206270657979, 0.01096548799577224, 0.01110052102436497, 0.01123718026050803, 0.01137548436127205, 0.01151545217396406, 0.01165710273743432, 0.01180045528337369, 0.01194552923760053, 0.01209234422133663, 0.0122409200524712, 0.0123912767468123, 0.01254343451932472, 0.01269741378535362, 0.01285323516183297, 0.0130109194684779, 0.01317048772896007, 0.0133319611720651, 0.01349536123283108, 0.01366070955366715, 0.01382802798545115, 0.01399733858860517, 0.01416866363414809, 0.01434202560472382, 0.01451744719560413, 0.01469495131566495, 0.0148745610883348, 0.01505629985251417, 0.01524019116346459, 0.01542625879366595, 0.01561452673364081, 0.01580501919274424, 0.0159977605999178, 0.01619277560440611, 0.01639008907643459, 0.01658972610784674, 0.01679171201269937, 0.01699607232781414, 0.01720283281328382, 0.01741201945293131, 0.01762365845471995, 0.01783777625111307, 0.01805439949938098, 0.01827355508185352, 0.01849527010611623, 0.01871957190514798, 0.0189464880373982, 0.01917604628680144, 0.01940827466272719, 0.01964320139986266, 0.01988085495802631, 0.02012126402190977, 0.02036445750074574, 0.02061046452789948, 0.02085931446038135, 0.02111103687827778, 0.02136566158409819, 0.02162321860203499, 0.02188373817713407, 0.02214725077437282, 0.02241378707764291, 0.02268337798863481, 0.02295605462562108, 0.02323184832213527, 0.02351079062554334, 0.02379291329550434, 0.02407824830231708, 0.02436682782514931, 0.02465868425014608, 0.0249538501684137, 0.02525235837387564, 0.02555424186099679, 0.02585953382237221, 0.02616826764617659, 0.02648047691347041, 0.02679619539535894, 0.02711545704999975, 0.02743829601945478, 0.02776474662638246, 0.02809484337056577, 0.0284286209252716, 0.02876611413343686, 0.02910735800367686, 0.02945238770611107, 0.02980123856800155, 0.03015394606919903, 0.03051054583739168, 0.0308710736431515, 0.03123556539477295, 0.03160405713289875, 0.0319765850249272, 0.03235318535919566, 0.03273389453893448, 0.03311874907598573, 0.03350778558428084, 0.03390104077307123, 0.03429855143990594, 0.03470035446335003, 0.03510648679543757, 0.03551698545385276, 0.03593188751383292, 0.03635123009978639, 0.03677505037661916, 0.03720338554076285, 0.03763627281089769, 0.038073749418363, 0.0385158525972484, 0.03896261957415821, 0.03941408755764192, 0.03987029372728316, 0.04033127522243947, 0.04079706913062544, 0.0412677124755312, 0.04174324220466839, 0.04222369517663574, 0.04270910814799584, 0.04319951775975522, 0.04369496052343898, 0.04419547280675201, 0.04470109081881768, 0.04521185059498593, 0.04572778798120147, 0.0462489386179236, 0.04677533792358848, 0.04730702107760493, 0.04784402300287446, 0.04838637834782653, 0.04893412146795935, 0.04948728640687718, 0.05004590687681441, 0.05061001623863681, 0.05117964748131055, 0.05175483320082894, 0.0523356055785873, 0.05292199635919606, 0.05351403682772212, 0.05411175778634865, 0.05471518953044294, 0.0553243618240228, 0.05593930387461082, 0.05656004430746676, 0.05718661113918778, 0.05781903175066614, 0.05845733285939451, 0.05910154049110847, 0.05975167995075601, 0.06040777579278385, 0.06106985179073054, 0.061737930906116, 0.06241203525661747, 0.06309218608352196, 0.06377840371844484, 0.06447070754930509, 0.06516911598554696, 0.06587364642259853, 0.06658431520555744, 0.06730113759209402, 0.06802412771456287, 0.0687532985413132, 0.06948866183718891, 0.07023022812320959, 0.07097800663542353, 0.07173200528292419, 0.07249223060502187, 0.07325868772756229, 0.07403138031838447, 0.07481031054191, 0.07559547901285674, 0.07638688474906949, 0.07718452512346154, 0.07798839581506017, 0.07879849075915062, 0.0796148020965125, 0.08043732012174408, 0.08126603323066905, 0.08210092786682198, 0.08294198846700848, 0.08378919740593689, 0.08464253493991847, 0.08550197914963444, 0.08636750588196765, 0.08723908869089836, 0.08811669877746359, 0.08900030492878033, 0.08988987345613371, 0.09078536813213187, 0.09168675012693013, 0.09259397794352753, 0.09350700735214011, 0.09442579132365589, 0.09535027996217721, 0.09628042043665763, 0.0972161569116408, 0.09815743047711015, 0.09910417907745933, 0.1000563374395942, 0.101013837000178, 0.1019766058320335, 0.1029445685697156, 0.1039176463342703, 0.1048957566571971, 0.1058788134036317, 0.10686672669477, 0.1078594028295524, 0.1088567442056321, 0.1098586492396495, 0.1108650122868392, 0.111875723559995, 0.1128906690478213, 0.1139097304327016, 0.114932785007914, 0.1159597055943277, 0.1169903604566153, 0.1180246132190168, 0.1190623227806943, 0.1201033432307171, 0.1211475237627197, 0.122194708589276, 0.1232447368560362, 0.1242974425556737, 0.125352654441692, 0.1264101959421444, 0.1274698850733186, 0.1285315343534439, 0.1295949507164784, 0.1306599354260369, 0.1317262839895215, 0.1327937860725208, 0.1338622254135424, 0.1349313797391511, 0.1360010206795812, 0.1370709136848986, 0.1381408179417877, 0.1392104862910429, 0.1402796651458431, 0.1413480944108943, 0.1424155074025248, 0.1434816307698205, 0.144546184416891, 0.1456088814263576, 0.1466694279841594, 0.1477275233057736, 0.1487828595639487, 0.1498351218180536, 0.1508839879451454, 0.1519291285728619, 0.1529702070142474, 0.1540068792046222, 0.155038793640608, 0.1560655913214241, 0.1570869056925715, 0.1581023625920225, 0.1591115801990375, 0.1601141689857316, 0.1611097316715139, 0.162097863180528, 0.1630781506022198, 0.1640501731551625, 0.1650135021542711, 0.1659677009815366, 0.1669123250604164, 0.1678469218340149, 0.1687710307471904, 0.1696841832327271, 0.1705859027017101, 0.1714757045382435, 0.1723530960986508, 0.1732175767153009, 0.174068637705199, 0.1749057623834863, 0.1757284260819906, 0.17653609617297, 0.1773282320981938, 0.1781042854035026, 0.1788636997789905, 0.1796059111049526, 0.1803303475037396, 0.1810364293976611, 0.1817235695730784, 0.182391173250828, 0.1830386381631146, 0.1836653546370113, 0.1842707056847061, 0.1848540671006285, 0.1854148075655918, 0.1859522887580853, 0.1864658654728445, 0.1869548857468331, 0.1874186909927607, 0.1878566161402646, 0.1882679897848783, 0.188652134344908, 0.1890083662263367, 0.1893359959958723, 0.1896343285622543, 0.1899026633659305, 0.1901402945772127, 0.1903465113030175, 0.1905205978022967, 0.1906618337102557, 0.1907694942714606, 0.1908428505819249, 0.1908811698402711, 0.1908837156080527, 0.1908497480793234, 0.1907785243595352, 0.190669298753844, 0.1905213230648989, 0.1903338469001865, 0.1901061179889995, 0.1898373825090956, 0.1895268854231077, 0.1891738708247648, 0.1887775822949774, 0.1883372632678413, 0.1878521574066046, 0.1873215089896439, 0.1867445633064902, 0.1861205670639391, 0.1854487688022804, 0.1847284193216737, 0.1839587721186946, 0.183139083833072, 0.1822686147046311, 0.1813466290404534, 0.180372395692258, 0.1793451885440068, 0.1782642870097257, 0.177128976541533, 0.1759385491478567, 0.1746923039218172, 0.1733895475797434, 0.1720295950097855, 0.170611769830576, 0.1691354049598843, 0.1675998431932, 0.1660044377921696, 0.1643485530828009, 0.162631565063338, 0.160852862021695, 0.1590118451623274, 0.1571079292423995, 0.1551405432170966, 0.1531091308939086, 0.1510131515956973, 0.1488520808323383, 0.1466254109807076, 0.1443326519727619, 0.1419733319914366, 0.1395469981740612, 0.137053217322963, 0.1344915766229045, 0.1318616843649659, 0.1291631706764546, 0.1263956882563879, 0.1235589131160603, 0.1206525453241686, 0.117676309755928, 0.1146299568455711, 0.1115132633415793, 0.1083260330639484, 0.1050680976627453, 0.1017393173771627, 0.09833958179422864, 0.09486881060627424, 0.0913269543662116, 0.08771399523961596, 0.08402994775255214, 0.0802748595340271, 0.07644881205189201, 0.07255192134095939, 0.06858433872204144, 0.06454625151055708, 0.06043788371334532, 0.0562594967118907, 0.05201138989031973, 0.04769390116078872, 0.04330740764074942, 0.03885232651078314, 0.03432911552006923, 0.02973827358109418, 0.02508034131772657, 0.02035590158597952, 0.0155655799661693, 0.01071004522397607, 0.005790009738530472, 0.0008062298955485608, -0.004240493556458553, -0.00934931518885962, -0.01451934461639541, -0.01974964626338009, -0.02503923917444773, -0.03038709687160623, -0.03579214725926921, -0.04125327257882422, -0.0467693094141611, -0.05233904874942605, -0.05796123608008229, -0.06363457157814643, -0.069357710312227, -0.0751292625227179, -0.08094779395219164, -0.08681182623069357, -0.09271983731525767, -0.09867026198254418, -0.1046614923730403, -0.1106918785847638, -0.1167597293138677, -0.1228633125389628, -0.1290008562453527, -0.1351705491847195, -0.1413705416651065, -0.1475989463653299, -0.1538538391672131, -0.1601332599982945, -0.1664352136769181, -0.1727576707508916, -0.1790985683202201, -0.1854558108337941, -0.191827270849385, -0.1982107897458776, -0.2046041783764148, -0.2110052176510472, -0.2174116590376415, -0.2238212249702174, -0.2302316091546157, -0.2366404767624705, -0.2430454645058991, -0.2494441805871656, -0.2558342045198057, -0.2622130868203326, -0.2685783485726385, -0.2749274808705105, -0.2812579441472359, -0.2875671674049604, -0.2938525473601873, -0.3001114475254201, -0.3063411972503083, -0.3125390907486236, -0.3187023861398069, -0.3248283045355949, -0.3309140292032329, -0.3369567048369765, -0.3429534369689423, -0.3489012915489211, -0.3547972947205871, -0.3606384328187133, -0.3664216526086851, -0.3721438617859266, -0.3778019297489795, -0.3833926886560574, -0.3889129347710689, -0.3943594301014886, -0.3997289043271408, -0.4050180570160199, -0.4102235601207331, -0.4153420607470392, -0.4203701841842549, -0.425304537185993, -0.4301417114887398, -0.4348782875551354, -0.4395108385284411, -0.4440359343845106, -0.4484501462675916, -0.4527500509964119, -0.4569322357272353, -0.4609933027608495, -0.4649298744807683, -0.4687385984102571, -0.4724161523761104, -0.4759592497674215, -0.4793646448778577, -0.482629138320209, -0.4857495825021919, -0.4887228871526712, -0.4915460248876087, -0.4942160368051624, -0.4967300380994446, -0.4990852236824982, -0.5012788738040896, -0.5033083596589163, -0.5051711489708292, -0.5068648115436347, -0.5083870247680132, -0.5097355790740392, -0.5109083833187351, -0.511903470098031, -0.5127190009724424, -0.5133532715957078, -0.5138047167355715, -0.514071915175829, -0.5141535944886945, -0.5140486356664957, -0.5137560776016519, -0.5132751214038427, -0.512605134543246, -0.5117456548086897, -0.5106963940695471, -0.5094572418301954, -0.508028268565863, -0.5064097288287068, -0.5046020641129925, -0.5026059054682966, -0.5004220758497177, -0.4980515921941605, -0.4954956672118727, -0.4927557108825315, -0.4898333316453413, -0.4867303372727807, -0.4834487354178548, -0.4799907338249547, -0.4763587401947137, -0.4725553616935744, -0.4685834040991489, -0.4644458705728742, -0.4601459600519292, -0.4556870652529013, -0.4510727702802708, -0.4463068478334196, -0.4413932560065748, -0.43633613467687, -0.4311398014765479, -0.4258087473462416, -0.4203476316672629, -0.4147612769718873, -0.4090546632317749, -0.4032329217258792, -0.3973013284905031, -0.3912652973555308, -0.3851303725723192, -0.3789022210402537, -0.3725866241405668, -0.366189469187675, -0.3597167404963741, -0.3531745101483091, -0.3465689283310007, -0.3399062134118212, -0.3331926416924003, -0.3264345368796034, -0.3196382592961174, -0.3128101948555503, -0.3059567438288026, -0.2990843094302745, -0.2921992862542285, -0.2853080485933035, -0.2784169386727746, -0.2715322548356339, -0.2646602397149378, -0.2578070684310869, -0.2509788368527818, -0.2441815499612898, -0.2374211103583774, -0.2307033069587687, -0.2240338039082907, -0.2174181297689351, -0.2108616670119035, -0.2043696418592857, -0.1979471145143655, -0.1915989698196214, -0.1853299083803147, -0.1791444381901236, -0.1730468667935596, -0.1670412940180407, -0.1611316053061411, -0.1553214656762782, -0.149614314337398, -0.1440133599803114, -0.1385215767653309, -0.1331417010226376, -0.1278762286784391, -0.1227274134164862, -0.1176972655809234, -0.1127875518227794, -0.1079997954886896, -0.103335277746711, -0.09879503944036255, -0.09437988365834526, -0.09009037900377674, -0.08592686354325973, -0.08188944941270891, -0.07797802805361975, -0.0741922760503988, -0.07053166153651155, -0.06699545113456418, -0.06358271739303872, -0.06029234668026443, -0.0571230474943469, -0.05407335914620341, -0.05114166077157652, -0.04832618062692547, -0.04562500562343039, -0.04303609105298739, -0.04055727046002326, -0.03818626561320895, -0.0359206965316971, -0.03375809152133732, -0.03169589717742344, -0.02973148831188332, -0.02786217776441828, -0.02608522605891617, -0.02439785086848022, -0.02279723625461364, -0.02128054164845402, -0.0198449105444408, -0.01848747887939697, -0.01720538307269203, -0.01599576770590034, -0.01485579282315571, -0.0137826408362062, -0.01277352302096904, -0.01182568559515474, -0.01093641536925097, -0.01010304496581119, -0.009322957604563746, -0.008593591453328036, -0.007912443547080457, -0.007277073279742562, -0.006685105475355373, -0.006134233047248488, -0.005622219255602828, -0.005146899575435944, -0.004706183188504682, -0.004298054113919253, -0.003920571993394454, -0.003571872548028406, -0.003250167724298767, -0.002953745547603742, -0.002680969702154765, -0.002430278856354509, -0.002200185752973884, -0.001989276083481443, -0.001796207165787711, -0.001619706444428127, -0.001458569831997065, -0.001311659909926095, -0.001177904003561235, -0.001056292298860245, -0.0009458741511910084, -0.0008457665367579705, -0.0007551128232988381, -0.0006731669561342397, -0.0005992381944704354, -0.0005326484608256568, -0.0004727574313272394, -0.0004189714752396455, -0.0003707411178464742, -0.0003275585666763199, -0.0002889553055715211, -0.0002544997607233836, -0.0002237950422060982, -0.0001964767639823004, -0.0001722109448079137, -0.0001506919919355045, -0.0001316407690065774, -0.0001148027490364716, -9.994625293293959e-05, -8.686077355295362e-05, -7.535538489335472e-05, -6.525723563089953e-05, -5.641012587703571e-05, -4.867316569301852e-05, -4.191951362215559e-05, -3.603519323813977e-05, -3.091798548144563e-05, -2.647639435921686e-05, -2.262868341731874e-05, -1.930198025541903e-05, -1.643144624604425e-05, -1.395950853531488e-05, -1.183515134512248e-05, -1.001326356237526e-05, -8.454039589003725e-06, -7.122430434999134e-06, -5.987642064113041e-06, -5.022678046193669e-06, -4.203923629679446e-06, -3.510768420718218e-06, -2.925264939962717e-06, -2.431820422577969e-06, -2.016919329708844e-06, -1.668874148988379e-06, -1.377602176093166e-06, -1.134426087434088e-06, -9.318962344708535e-07, -7.636327116232294e-07, -6.241853711959125e-07, -5.089100791242858e-07, -4.138596237843883e-07, -3.356878058037665e-07, -2.715653490822151e-07, -2.191063815114888e-07, -1.763043377024151e-07, -1.41476235015473e-07, -1.132143680674624e-07, -9.034455545788945e-08, -7.189015560752506e-08, -5.704114628210737e-08, -4.512763460307972e-08, -3.559723119659518e-08, -2.799578372152907e-08, -2.195102150705885e-08, -1.715871461392218e-08, -1.337099753071534e-08, -1.038655016768089e-08, -8.042367069662322e-09, -6.206880100510954e-09, -4.774230518701712e-09, -3.659513639590201e-09, -2.794843392885836e-09, -2.126105438372677e-09, -1.610285544099596e-09, -1.213279021940967e-09, -9.080863566659195e-10, -6.733773791829873e-10, -4.921647004054244e-10, -3.51876221811061e-10, -2.375975156699715e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "2S", + "angular_momentum": 0 + }, + { + "index": 2, + "radial_function": [ + 1.061086659232113e-07, 1.083422345628062e-07, 1.106228194269892e-07, 1.129514101989671e-07, 1.153340718400357e-07, 1.17782682779179e-07, 1.202960331176699e-07, 1.228762077818541e-07, 1.255247985239565e-07, 1.282434362515703e-07, 1.310337942166005e-07, 1.338975889080041e-07, 1.368365811036238e-07, 1.398525769656335e-07, 1.429474291617767e-07, 1.461230380155184e-07, 1.493813526856178e-07, 1.527243723758625e-07, 1.561541475757185e-07, 1.596727813326643e-07, 1.632824305570015e-07, 1.669853073599522e-07, 1.707836804258722e-07, 1.746798764194352e-07, 1.786762814286582e-07, 1.827753424446681e-07, 1.869795688791238e-07, 1.912915341202393e-07, 1.957138771283713e-07, 2.002493040721621e-07, 2.049005900062541e-07, 2.09670580591615e-07, 2.145621938595421e-07, 2.195784220204404e-07, 2.247223333184953e-07, 2.299970739333909e-07, 2.354058699302535e-07, 2.409520292590288e-07, 2.466389438045322e-07, 2.524700914884457e-07, 2.584490384245602e-07, 2.64579441128603e-07, 2.708650487840188e-07, 2.773097055651073e-07, 2.839173530189591e-07, 2.906920325076637e-07, 2.976378877123049e-07, 3.047591672002931e-07, 3.120602270576249e-07, 3.195455335877024e-07, 3.27219666078381e-07, 3.350873196389604e-07, 3.431533081088771e-07, 3.514225670398953e-07, 3.599001567536474e-07, 3.685912654764128e-07, 3.775012125530767e-07, 3.86635451742259e-07, 3.959995745946513e-07, 4.055993139166518e-07, 4.154405473214433e-07, 4.255293008697082e-07, 4.358717528022358e-07, 4.464742373667273e-07, 4.573432487411679e-07, 4.684854450561899e-07, 4.799076525189154e-07, 4.916168696408275e-07, 5.03620271572283e-07, 5.159252145463464e-07, 5.285392404346924e-07, 5.414700814183888e-07, 5.547256647764506e-07, 5.683141177951192e-07, 5.822437728009028e-07, 5.965231723204824e-07, 6.111610743706762e-07, 6.261664578817216e-07, 6.415485282572286e-07, 6.573167230742349e-07, 6.734807179268805e-07, 6.900504324173097e-07, 7.070360362974947e-07, 7.244479557657747e-07, 7.4229687992199e-07, 7.605937673851952e-07, 7.793498530780336e-07, 7.985766551819539e-07, 8.182859822675587e-07, 8.384899406044815e-07, 8.592009416552949e-07, 8.804317097580704e-07, 9.021952900023232e-07, 9.245050563031936e-07, 9.473747196788383e-07, 9.708183367361302e-07, 9.948503183698914e-07, 1.019485438681016e-06, 1.044738844118969e-06, 1.070626062854295e-06, 1.097163014386898e-06, 1.124366019396004e-06, 1.152251809837868e-06, 1.180837539297436e-06, 1.210140793600326e-06, 1.240179601691649e-06, 1.270972446788367e-06, 1.302538277812024e-06, 1.334896521108897e-06, 1.368067092464751e-06, 1.402070409421577e-06, 1.436927403903893e-06, 1.472659535162337e-06, 1.509288803042515e-06, 1.546837761587238e-06, 1.585329532980495e-06, 1.624787821841722e-06, 1.665236929879123e-06, 1.706701770911033e-06, 1.749207886264538e-06, 1.792781460560775e-06, 1.8374493378966e-06, 1.883239038432521e-06, 1.930178775397071e-06, 1.978297472518025e-06, 2.027624781891123e-06, 2.078191102297254e-06, 2.130027597979296e-06, 2.183166217890091e-06, 2.23763971542335e-06, 2.29348166863952e-06, 2.350726500999e-06, 2.409409502615363e-06, 2.469566852041575e-06, 2.531235638602502e-06, 2.594453885287357e-06, 2.659260572216051e-06, 2.725695660693771e-06, 2.793800117868447e-06, 2.863615942006169e-06, 2.935186188399939e-06, 3.008554995927558e-06, 3.083767614274842e-06, 3.160870431840721e-06, 3.239911004341249e-06, 3.320938084129892e-06, 3.404001650251981e-06, 3.489152939251587e-06, 3.576444476749549e-06, 3.665930109811874e-06, 3.757665040128153e-06, 3.851705858020172e-06, 3.948110577301353e-06, 4.0469386710082e-06, 4.148251108025423e-06, 4.252110390626971e-06, 4.358580592955724e-06, 4.467727400465176e-06, 4.579618150347021e-06, 4.69432187296911e-06, 4.811909334348869e-06, 4.932453079687914e-06, 5.056027477994166e-06, 5.182708767818466e-06, 5.312575104133345e-06, 5.445706606382265e-06, 5.582185407728359e-06, 5.722095705532398e-06, 5.865523813090462e-06, 6.012558212662498e-06, 6.163289609823762e-06, 6.3178109891719e-06, 6.476217671423208e-06, 6.638607371932474e-06, 6.805080260671592e-06, 6.975739023703074e-06, 7.150688926185363e-06, 7.330037876947868e-06, 7.513896494674475e-06, 7.70237817573528e-06, 7.895599163707244e-06, 8.093678620625479e-06, 8.296738700007855e-06, 8.504904621696704e-06, 8.718304748562415e-06, 8.937070665114838e-06, 9.161337258069525e-06, 9.391242798916969e-06, 9.626929028544145e-06, 9.868541243958942e-06, 1.011622838716921e-05, 1.037014313626942e-05, 1.063044199878926e-05, 1.089728540735978e-05, 1.117083781775401e-05, 1.145126780936044e-05, 1.173874818814905e-05, 1.203345609219109e-05, 1.233557309979533e-05, 1.26452853403249e-05, 1.296278360776045e-05, 1.32882634770769e-05, 1.36219254235029e-05, 1.396397494473325e-05, 1.431462268616695e-05, 1.467408456924454e-05, 1.504258192296081e-05, 1.542034161863036e-05, 1.580759620798548e-05, 1.620458406468781e-05, 1.661154952933707e-05, 1.702874305806213e-05, 1.745642137478193e-05, 1.789484762722557e-05, 1.834429154680316e-05, 1.880502961242135e-05, 1.927734521833934e-05, 1.976152884616386e-05, 2.025787824108356e-05, 2.076669859244602e-05, 2.128830271878276e-05, 2.182301125739016e-05, 2.237115285857694e-05, 2.293306438469138e-05, 2.35090911140439e-05, 2.40995869498438e-05, 2.470491463427148e-05, 2.532544596781022e-05, 2.596156203396494e-05, 2.661365342949784e-05, 2.72821205003144e-05, 2.796737358313586e-05, 2.866983325309791e-05, 2.938993057741835e-05, 3.012810737527989e-05, 3.088481648407775e-05, 3.16605220321851e-05, 3.245569971839304e-05, 3.327083709818549e-05, 3.4106433877013e-05, 3.496300221073341e-05, 3.584106701339116e-05, 3.674116627251098e-05, 3.766385137208594e-05, 3.86096874234437e-05, 3.957925360417948e-05, 4.057314350534821e-05, 4.159196548711298e-05, 4.263634304305155e-05, 4.370691517332698e-05, 4.480433676693363e-05, 4.59292789932341e-05, 4.708242970300827e-05, 4.826449383923995e-05, 4.947619385787249e-05, 5.071827015876936e-05, 5.199148152712157e-05, 5.329660558554915e-05, 5.463443925714926e-05, 5.60057992397498e-05, 5.741152249163263e-05, 5.885246672899691e-05, 6.032951093543916e-05, 6.184355588373242e-05, 6.339552467019405e-05, 6.49863632619373e-05, 6.661704105730915e-05, 6.828855145982338e-05, 7.000191246590452e-05, 7.175816726676577e-05, 7.355838486475121e-05, 7.540366070447924e-05, 7.72951173191326e-05, 7.923390499224758e-05, 8.12212024353623e-05, 8.325821748189299e-05, 8.534618779761418e-05, 8.748638160812785e-05, 8.968009844371434e-05, 9.192866990196694e-05, 9.423346042862053e-05, 9.659586811699369e-05, 9.901732552647278e-05, 0.0001014993005204757, 0.0001040432971243428, 0.0001066508564036117, 0.0001093235573631426, 0.0001120630178675709, 0.0001148708955835745, 0.0001177488889444527, 0.0001206987381375238, 0.0001237222261148625, 0.0001268211796279038, 0.0001299974702864565, 0.0001332530156426779, 0.000136589780300573, 0.0001400097770515943, 0.0001435150680369304, 0.0001471077659370822, 0.0001507900351893399, 0.0001545640932337852, 0.0001584322117884572, 0.0001623967181543322, 0.0001664599965507826, 0.0001706244894821919, 0.0001748926991364176, 0.0001792671888158082, 0.0001837505844014935, 0.0001883455758516826, 0.0001930549187347191, 0.0001978814357976571, 0.0002028280185711365, 0.0002078976290113541, 0.0002130933011799382, 0.0002184181429625552, 0.0002238753378270879, 0.000229468146622247, 0.0002351999094174885, 0.0002410740473851304, 0.0002470940647255779, 0.0002532635506365834, 0.0002595861813274847, 0.0002660657220793853, 0.0002727060293522545, 0.0002795110529399478, 0.0002864848381741627, 0.0002936315281783677, 0.0003009553661727568, 0.0003084606978313053, 0.0003161519736920199, 0.0003240337516214974, 0.000332110699334922, 0.0003403875969726593, 0.0003488693397346168, 0.0003575609405735676, 0.0003664675329486526, 0.0003755943736402954, 0.0003849468456277912, 0.000394530461030845, 0.0004043508641163629, 0.0004144138343718172, 0.0004247252896465309, 0.0004352912893622481, 0.0004461180377943797, 0.0004572118874253364, 0.0004685793423713822, 0.0004802270618844683, 0.0004921618639305244, 0.0005043907288457142, 0.0005169208030721775, 0.0005297594029748138, 0.0005429140187406762, 0.0005563923183625744, 0.0005702021517085072, 0.0005843515546785678, 0.0005988487534509885, 0.000613702168819018, 0.0006289204206203432, 0.0006445123322607974, 0.0006604869353341115, 0.0006768534743395002, 0.0006936214114988848, 0.0007108004316755919, 0.000728400447396377, 0.0007464316039786562, 0.0007649042847648444, 0.0007838291164657253, 0.0008032169746148008, 0.0008230789891355859, 0.0008434265500238428, 0.0008642713131467647, 0.000885625206161145, 0.0009075004345525851, 0.0009299094877978173, 0.0009528651456522381, 0.0009763804845647674, 0.001000468884222168, 0.001025144034224977, 0.001050419940897224, 0.001076310934232122, 0.001102831674975934, 0.001129997161852239, 0.001157822738928841, 0.001186324103129556, 0.001215517311893155, 0.001245418790981733, 0.001276045342440803, 0.001307414152713394, 0.00133954280091049, 0.001372449267240095, 0.001406151941597288, 0.001440669632317557, 0.001476021575095778, 0.001512227442073167, 0.00154930735109454, 0.001587281875138233, 0.001626172051921013, 0.001665999393680317, 0.001706785897136159, 0.001748554053635017, 0.001791326859478023, 0.00183512782643577, 0.001879980992452016, 0.001925910932538565, 0.0019729427698636, 0.002021102187035701, 0.002070415437585759, 0.002120909357649007, 0.002172611377849311, 0.002225549535387887, 0.002279752486338521, 0.002335249518151397, 0.002392070562367537, 0.002450246207545876, 0.00250980771240488, 0.002570787019180649, 0.002633216767203319, 0.00269713030669358, 0.002762561712781018, 0.002829545799745976, 0.002898118135486532, 0.002968315056212111, 0.003040173681365211, 0.003113731928772614, 0.00318902853002737, 0.003266103046102779, 0.003344995883199465, 0.00342574830882657, 0.003508402468117992, 0.003593001400384442, 0.00367958905590205, 0.003768210312938068, 0.003858910995014132, 0.003951737888407398, 0.004046738759889749, 0.004143962374705104, 0.004243458514784724, 0.004345277997200287, 0.004449472692854267, 0.004556095545407084, 0.004665200590440226, 0.004776842974854442, 0.004891078976501858, 0.005007966024050713, 0.005127562717081186, 0.005249928846410608, 0.005375125414646092, 0.005503214656962429, 0.005634260062102855, 0.005768326393600051, 0.005905479711214483, 0.006045787392586962, 0.006189318155102002, 0.006336142077958316, 0.006486330624442497, 0.006639956664401608, 0.006797094496910198, 0.006957819873126826, 0.007122210019334975, 0.007290343660162846, 0.0074623010419762, 0.007638163956438089, 0.007818015764228942, 0.008001941418920116, 0.008190027490993648, 0.008382362192000578, 0.008579035398849771, 0.008780138678218825, 0.008985765311078172, 0.009196010317319146, 0.009410970480476219, 0.00963074437253331, 0.0098554323788035, 0.01008513672287111, 0.01031996149158449, 0.01056001266008758, 0.01080539811687758, 0.01105622768887567, 0.01131261316649725, 0.0115746683287075, 0.01184250896804761, 0.01211625291561636, 0.0123960200659914, 0.01268193240207362, 0.01297411401983783, 0.01327269115297189, 0.0135777921973863, 0.01388954773557522, 0.01420809056080945, 0.01453355570114119, 0.01486608044319972, 0.01520580435575635, 0.01555286931303656, 0.01590741951775613, 0.01626960152385768, 0.0166395642589231, 0.01701745904623653, 0.01740343962647202, 0.01779766217897883, 0.01820028534263691, 0.0186114702362538, 0.01903138047847387, 0.01946018220716937, 0.01989804409828241, 0.0203451373840855, 0.02080163587082798, 0.0212677159557341, 0.02174355664331788, 0.02222933956097877, 0.02272524897384118, 0.02323147179879959, 0.02374819761773032, 0.0242756186898295, 0.0248139299630358, 0.02536332908449549, 0.02592401641002573, 0.02649619501253118, 0.02708007068932757, 0.02767585196832445, 0.02828375011301818, 0.02890397912624471, 0.0295367557526402, 0.0301822994797563, 0.03084083253777503, 0.03151257989776706, 0.032197769268435, 0.03289663109128234, 0.03360939853414635, 0.03433630748303183, 0.03507759653218068, 0.03583350697231025, 0.03660428277695179, 0.03739017058681797, 0.03819141969212667, 0.03900828201280573, 0.03984101207650186, 0.04068986699431357, 0.04155510643416686, 0.04243699259174903, 0.04333579015891401, 0.04425176628946995, 0.04518519056225715, 0.04613633494142177, 0.04710547373378798, 0.04809288354322819, 0.04909884322192864, 0.05012363381844376, 0.05116753852243055, 0.05223084260595045, 0.05331383336122327, 0.05441680003471434, 0.05554003375743286, 0.05668382747131583, 0.05784847585156852, 0.05903427522482911, 0.06024152348302116, 0.06147051999275445, 0.06272156550013051, 0.06399496203080605, 0.06529101278516322, 0.06661002202843228, 0.06795229497560833, 0.06931813767100019, 0.07070785686224526, 0.07212175986862136, 0.07356015444348207, 0.0750233486306388, 0.0765116506145093, 0.07802536856384894, 0.07956481046887724, 0.08113028397160967, 0.08272209618920082, 0.08434055353010272, 0.08598596150283906, 0.08765862451719333, 0.08935884567760723, 0.0910869265685828, 0.09284316703188052, 0.09462786493530419, 0.0964413159328618, 0.09828381321609156, 0.1001556472563411, 0.102057105537789, 0.1039884722809972, 0.1059500281567847, 0.1079420499902153, 0.1099648104544915, 0.1120185777545533, 0.1141036153001814, 0.1162201813684091, 0.1183685287550524, 0.1205489044151729, 0.1227615490922949, 0.1250066969362053, 0.1272845751091711, 0.1295954033804222, 0.1319393937087517, 0.1343167498131007, 0.1367276667310048, 0.139172330364791, 0.1416509170154278, 0.1441635929039476, 0.1467105136803711, 0.1492918239200859, 0.1519076566076425, 0.1545581326079533, 0.1572433601248975, 0.1599634341473541, 0.1627184358827068, 0.165508432177884, 0.1683334749280216, 0.1711936004728555, 0.1740888289809757, 0.1770191638220952, 0.1799845909275113, 0.182985078138959, 0.1860205745460787, 0.1890910098127445, 0.1921962934925179, 0.1953363143335162, 0.1985109395730003, 0.2017200142220019, 0.2049633603403742, 0.2082407761454817, 0.2115520347261892, 0.2148968833007839, 0.2182750433009233, 0.2216862093077148, 0.2251300483388622, 0.228606199070899, 0.2321142710514767, 0.2356538439080046, 0.2392244665508235, 0.2428256563715657, 0.2464568984370105, 0.2501176446786856, 0.2538073130784056, 0.2575252868498588, 0.2612709136162717, 0.2650435045840707, 0.2688423337123356, 0.2726666368777101, 0.2765156110342731, 0.2803884133677013, 0.2842841604428515, 0.2882019273436709, 0.2921407468040967, 0.2960996083283352, 0.3000774572986097, 0.3040731940681396, 0.3080856730367551, 0.3121137017061688, 0.316156039711506, 0.3202113978252533, 0.32427843692931, 0.3283557669503325, 0.3324419457530364, 0.3365354779855852, 0.340634813870643, 0.3447383479351127, 0.3488444176710357, 0.3529513021195958, 0.3570572203696773, 0.3611603299619844, 0.3652587251893607, 0.3693504352836813, 0.373433422479554, 0.3775055799450881, 0.3815647295702167, 0.3856086196035166, 0.3896349221292074, 0.393641230377067, 0.3976250558594052, 0.4015838253310397, 0.4055148775704246, 0.4094154599827233, 0.413282725028691, 0.417113726486711, 0.4209054155591935, 0.4246546368387071, 0.4283581241536138, 0.4320124963174765, 0.4356142528109869, 0.4391597694294616, 0.4426452939329077, 0.4460669417391131, 0.4494206917030006, 0.4527023820274867, 0.4559077063521867, 0.45903221006647, 0.4620712868925743, 0.4650201757827849, 0.4678739581721745, 0.4706275556252051, 0.473275727910808, 0.4758130715365504, 0.4782340187683835, 0.4805328371584238, 0.4827036295994304, 0.484740334921229, 0.4866367290414328, 0.4883864266804573, 0.4899828836490843, 0.491419399715682, 0.4926891220596057, 0.4937850493172513, 0.4947000362276336, 0.4954267988851465, 0.4959579206082543, 0.4962858584341996, 0.4964029502512966, 0.4963014225819783, 0.4959733990313997, 0.4954109094180317, 0.4946058996042733, 0.4935502420466203, 0.4922357470863433, 0.4906541750029206, 0.4887972488536257, 0.4866566681236725, 0.4842241232121725, 0.4814913107798351, 0.4784499499848498, 0.475091799633713, 0.4714086762739028, 0.4673924732552476, 0.4630351807865717, 0.458328907013735, 0.4532659001444829, 0.4478385716446008, 0.4420395205286827, 0.4358615587673947, 0.4292977378313919, 0.4223413763900427, 0.4149860891807867, 0.4072258170623047, 0.3990548582616663, 0.3904679008222415, 0.3814600562553869, 0.3720268943947148, 0.3621644794471164, 0.3518694072295996, 0.3411388435754006, 0.3299705638867124, 0.3183629938047132, 0.3063152509603565, 0.2938271877615727, 0.2808994351641104, 0.2675334473641951, 0.2537315473414775, 0.2394969731703745, 0.2248339250068506, 0.2097476126459344, 0.194244303532813, 0.1783313710971733, 0.162017343266583, 0.1453119510001048, 0.1282261766680466, 0.1107723020877564, 0.09296395600871163, 0.07481616082284612, 0.05634537825812863, 0.03756955379491193, 0.01850815952554044, -0.000817764841794279, -0.02038557315386689, -0.04017097649705425, -0.06014800975543047, -0.0802890014225896, -0.1005645470681388, -0.1209434868804022, -0.1413928877272656, -0.1618780301971282, -0.1823624011014565, -0.2028076919392859, -0.2231738038419974, -0.2434188595336235, -0.2634992228575814, -0.2833695264348797, -0.3029827080312371, -0.3222900562209464, -0.3412412659434242, -0.3597845045539265, -0.3778664889725563, -0.3954325745351476, -0.4124268561455213, -0.4287922823205815, -0.4444707827074426, -0.4594034096347959, -0.4735304942386555, -0.4867918176750149, -0.4991267978983611, -0.5104746924449464, -0.5207748176127277, -0.529966784375429, -0.5379907513057498, -0.5447876947117647, -0.5502996961104939, -0.5544702470728674, -0.5572445713742876, -0.5585699642740934, -0.5583961486248352, -0.5566756473777796, -0.5533641719038438, -0.5484210253886245, -0.5418095203857399, -0.5334974094237869, -0.5234573273583131, -0.5116672439408488, -0.4981109248418368, -0.4827783991129472, -0.4656664308065557, -0.4467789921859787, -0.4261277356598452, -0.4037324612576532, -0.379621576131388, -0.3538325422222548, -0.3264123078715226, -0.2974177187829641, -0.2669159033628413, -0.2349846270744705, -0.2017126100510691, -0.1671998018165434, -0.1315576065734313, -0.09490905213552743, -0.05738889521574608, -0.01914365543443479, 0.01966842990246807, 0.05887753848334679, 0.09830249158549369, 0.1377510872097041, 0.1770205464764418, 0.2158980820149944, 0.2541615969853891, 0.2915805231632611, 0.3279168061726883, 0.362926045452416, 0.3963587958670077, 0.4279620370055654, 0.4574808151253759, 0.484660061374472, 0.5092465883439636, 0.5309912651369452, 0.5496513689756795, 0.5649931088842038, 0.5767943141635473, 0.5848472772088208, 0.5889617366933213, 0.5889679832617741, 0.5847200656369522, 0.5760990704633563, 0.5630164443102194, 0.5454173210668603, 0.523283812532313, 0.4966382143887762, 0.4655460740317063, 0.4301190610033632, 0.3905175751559944, 0.3469530222914716, 0.2996896820442051, 0.2490460883748626, 0.1953958394285999, 0.1391677509112344, 0.08084526580036537, 0.02096503340654016, -0.03988542717981532, -0.1010710600757215, -0.1619136762708536, -0.2216974928971921, -0.279675860672447, -0.3350792164098786, -0.3871242782448621, -0.4350244782231377, -0.4780015999954254, -0.5152985585214758, -0.5461932239636458, -0.570013153534863, -0.5861510533004717, -0.5940807473370895, -0.5933733849441764, -0.5837135687282292, -0.5649150385201859, -0.5369354996852255, -0.4998901411474397, -0.4540633503510662, -0.3999181015531976, -0.338102472974718, -0.2694527407806831, -0.1949953280006498, -0.1159464641614813, -0.0336862701113246, 0.05026867491232625, 0.1342558549216186, 0.2165080750847486, 0.2951824435103374, 0.3683996198893282, 0.4342879524649733, 0.4910325175575206, 0.5369282010265247, 0.570435708062944, 0.590239111739476, 0.595303269770399, 0.5849291721733889, 0.558805043260823, 0.517050825822253, 0.4602535412243284, 0.389490965270454, 0.3063411052197794, 0.2128751269861044, 0.1116316801193784, 0.005571015517078206, -0.1019921039352164, -0.2074769336506771, -0.3071460311296255, -0.397233544448488, -0.4740895819232734, -0.5343364908848872, -0.5750316043003294, -0.5938297909744389, -0.5891380480778737, -0.5602534945979631, -0.5074755596208619, -0.432183011835956, -0.336866844851766, -0.2251110044237336, -0.1015145860142113, 0.02844851856564914, 0.1586334921451396, 0.2824892440579861, 0.3933768697377982, 0.4849303592250651, 0.5514433151735487, 0.5882609885052402, 0.5921530986851359, 0.5616401023072252, 0.4972442532349746, 0.4016373889154147, 0.2796602510561674, 0.138193566887528, -0.01413081946081626, -0.1673798808863442, -0.3109187696907185, -0.4341337309976627, -0.5272379365745713, -0.5821056078697898, -0.5930680507189324, -0.5575973740471305, -0.4768013536933642, -0.3556575921462567, -0.2029278826818338, -0.03071495422422015, 0.1463468543008356, 0.3122394006881366, 0.4509793033494314, 0.5481990143661589, 0.5927459139157175, 0.5781282881488422, 0.5036254015465607, 0.3748870360286959, 0.2038792329569057, 0.008088639080731123, -0.1910241615490186, -0.3702372828550798, -0.5072591942205531, -0.5836162133342584, -0.5873773424428963, -0.5153194993544745, -0.3741487266687449, -0.180466020029507, 0.04069796464873946, 0.2587647934935294, 0.4416665151590923, 0.5606092903488885, 0.5948743342973646, 0.5358673819410635, 0.3896226020528837, 0.1771041478105, -0.06806541442979486, -0.3044074469173482, -0.4893909077190348, -0.5872287441150021, -0.5762882774793028, -0.4545788832158946, -0.2419168528586162, 0.02216496280946916, 0.2849271293354954, 0.4905591941123072, 0.5921819940938632, 0.5632192595465502, 0.4053634247286771, 0.1507192113885644, -0.1432448397346297, -0.4056936046613245, -0.5692579334594415, -0.5879233773114214, -0.4512551986309475, -0.1904631475990176, 0.1266040678376505, 0.4116676674413486, 0.5801907427125577, 0.5772435826088568, 0.3971750950896978, 0.0898604291536929, -0.2504772507554465, -0.51261605023188, -0.604733292937672, -0.488262154920263, -0.1965463520604796, 0.1708511943132507, 0.4795548926172398, 0.6089830855576926, 0.5012302272245173, 0.1902490664759976, -0.2037847472962479, -0.5175969457022102, -0.6119380490294066, -0.4359595667025157 + ], + "label": "2P", + "angular_momentum": 1 + }, + { + "index": 3, + "radial_function": [ + 1.872455252464335e-07, 1.911870104159631e-07, 1.952114631507854e-07, 1.993206299034665e-07, 2.035252132788114e-07, 2.078461746893928e-07, 2.122813790682855e-07, 2.1683450560211e-07, 2.215083631916239e-07, 2.263058298334479e-07, 2.312298582474887e-07, 2.362834774523181e-07, 2.414697946213014e-07, 2.467919970156847e-07, 2.522533539631987e-07, 2.578572188876852e-07, 2.636070313906389e-07, 2.695063193859776e-07, 2.755587012893643e-07, 2.817678882634442e-07, 2.881376865203902e-07, 2.946719996831874e-07, 3.013748312071214e-07, 3.082502868629781e-07, 3.153025772834918e-07, 3.225360205746264e-07, 3.299550449933089e-07, 3.375641916932781e-07, 3.453681175407518e-07, 3.533715980016609e-07, 3.615795301022412e-07, 3.699969354648207e-07, 3.78628963420684e-07, 3.874808942019479e-07, 3.965581422144245e-07, 4.058662593935042e-07, 4.154109386451394e-07, 4.251980173740612e-07, 4.352334811014183e-07, 4.455234671740811e-07, 4.560742685679084e-07, 4.668923377873368e-07, 4.77984290863708e-07, 4.893569114548124e-07, 5.010171550481907e-07, 5.129721532707952e-07, 5.252292183076832e-07, 5.377958474324804e-07, 5.506797276524178e-07, 5.63888740470822e-07, 5.774309667700069e-07, 5.913146918175917e-07, 6.05548410399344e-07, 6.201408320817267e-07, 6.351008866074065e-07, 6.504377294270642e-07, 6.661607473709296e-07, 6.822795644635529e-07, 6.988040478854098e-07, 7.157443140850287e-07, 7.331107350454229e-07, 7.50913944708704e-07, 7.69164845562849e-07, 7.878746153946983e-07, 8.070547142133572e-07, 8.267168913482846e-07, 8.468731927264572e-07, 8.675359683331066e-07, 8.887178798606445e-07, 9.104319085504988e-07, 9.32691363232712e-07, 9.555098885682662e-07, 9.789014734992296e-07, 1.002880459911944e-06, 1.027461551518601e-06, 1.052659822962706e-06, 1.07849072915403e-06, 1.104970114838836e-06, 1.132114224411277e-06, 1.159939711972022e-06, 1.188463651640325e-06, 1.217703548125894e-06, 1.247677347567089e-06, 1.278403448642131e-06, 1.309900713960181e-06, 1.342188481739316e-06, 1.375286577778593e-06, 1.409215327731602e-06, 1.443995569689056e-06, 1.479648667078188e-06, 1.516196521886899e-06, 1.553661588220806e-06, 1.592066886201552e-06, 1.631436016214929e-06, 1.671793173517589e-06, 1.713163163211358e-06, 1.755571415594353e-06, 1.799044001898354e-06, 1.843607650422138e-06, 1.889289763070681e-06, 1.936118432310417e-06, 1.984122458550986e-06, 2.033331367964158e-06, 2.083775430750894e-06, 2.135485679867775e-06, 2.188493930224322e-06, 2.242832798362983e-06, 2.29853572263391e-06, 2.355636983876903e-06, 2.414171726623231e-06, 2.474175980830352e-06, 2.535686684162883e-06, 2.598741704833488e-06, 2.663379865017718e-06, 2.729640964857163e-06, 2.797565807065639e-06, 2.867196222153522e-06, 2.938575094285674e-06, 3.011746387788835e-06, 3.086755174324725e-06, 3.163647660745504e-06, 3.242471217648659e-06, 3.323274408648825e-06, 3.406107020384434e-06, 3.49102009327761e-06, 3.578065953066097e-06, 3.667298243126536e-06, 3.75877195760888e-06, 3.852543475402182e-06, 3.948670594952563e-06, 4.047212569954612e-06, 4.148230145938064e-06, 4.251785597772088e-06, 4.357942768110103e-06, 4.466767106798608e-06, 4.578325711274074e-06, 4.692687367972557e-06, 4.809922594777308e-06, 4.93010368453026e-06, 5.053304749633941e-06, 5.179601767770989e-06, 5.309072628769136e-06, 5.441797182640232e-06, 5.577857288822537e-06, 5.717336866656298e-06, 5.8603219471233e-06, 6.006900725881908e-06, 6.157163617629839e-06, 6.311203311827717e-06, 6.469114829817316e-06, 6.630995583369154e-06, 6.796945434695054e-06, 6.967066757962064e-06, 7.141464502345124e-06, 7.320246256656705e-06, 7.503522315592647e-06, 7.691405747634357e-06, 7.884012464648514e-06, 8.081461293226488e-06, 8.28387404780663e-06, 8.491375605623756e-06, 8.704093983531144e-06, 8.922160416741538e-06, 9.145709439534784e-06, 9.374878967980857e-06, 9.60981038472827e-06, 9.850648625909103e-06, 1.009754227021306e-05, 1.035064363018435e-05, 1.061010884579647e-05, 1.087609798036123e-05, 1.11487751188299e-05, 1.142830846854565e-05, 1.171487046250798e-05, 1.200863786521122e-05, 1.23097918811208e-05, 1.26185182658525e-05, 1.293500744012156e-05, 1.325945460652991e-05, 1.359205986926184e-05, 1.393302835675975e-05, 1.428257034745373e-05, 1.46409013986201e-05, 1.500824247844635e-05, 1.538482010138136e-05, 1.577086646685199e-05, 1.616661960142901e-05, 1.657232350452729e-05, 1.698822829772737e-05, 1.741459037780744e-05, 1.785167257357725e-05, 1.829974430660725e-05, 1.87590817559489e-05, 1.92299680269443e-05, 1.971269332422543e-05, 2.020755512900616e-05, 2.071485838077225e-05, 2.123491566347742e-05, 2.176804739635594e-05, 2.231458202946507e-05, 2.287485624407317e-05, 2.344921515801243e-05, 2.403801253611754e-05, 2.464161100587508e-05, 2.526038227841109e-05, 2.589470737494728e-05, 2.654497685885974e-05, 2.721159107347711e-05, 2.789496038575818e-05, 2.859550543599283e-05, 2.931365739367302e-05, 3.004985821968455e-05, 3.080456093497357e-05, 3.157822989584579e-05, 3.237134107605975e-05, 3.318438235587979e-05, 3.401785381825793e-05, 3.487226805231807e-05, 3.574815046432012e-05, 3.664603959628555e-05, 3.756648745247082e-05, 3.851005983387864e-05, 3.947733668100246e-05, 4.046891242500354e-05, 4.148539634752496e-05, 4.252741294935178e-05, 4.359560232813133e-05, 4.469062056537278e-05, 4.581314012295038e-05, 4.696385024933976e-05, 4.81434573958224e-05, 4.935268564289874e-05, 5.059227713715602e-05, 5.186299253884278e-05, 5.316561148040782e-05, 5.450093303626746e-05, 5.586977620407114e-05, 5.72729803977416e-05, 5.871140595257238e-05, 6.018593464267208e-05, 6.169747021105133e-05, 6.324693891265547e-05, 6.48352900706529e-05, 6.646349664629618e-05, 6.813255582268068e-05, 6.984348960273216e-05, 7.159734542176378e-05, 7.339519677494925e-05, 7.523814386006821e-05, 7.71273142358871e-05, 7.906386349654784e-05, 8.104897596234466e-05, 8.30838653872787e-05, 8.516977568378796e-05, 8.730798166506058e-05, 8.949978980534739e-05, 9.174653901870021e-05, 9.404960145657142e-05, 9.64103833247204e-05, 9.883032571988263e-05, 0.0001013109054866674, 0.0001038536360951608, 0.0001064600685397211, 0.0001091317922594647, 0.0001118704360809531, 0.0001146776691835993, 0.0001175552020883299, 0.0001205047876700434, 0.0001235282221944244, 0.0001266273463796807, 0.0001298040464837861, 0.0001330602554178229, 0.0001363979538860316, 0.0001398191715531899, 0.0001433259882399539, 0.0001469205351468131, 0.0001506049961073189, 0.0001543816088712674, 0.000158252666418526, 0.0001622205183042134, 0.0001662875720359544, 0.000170456294483949, 0.00017472921332461, 0.0001791089185185388, 0.0001835980638236297, 0.0001881993683441036, 0.0001929156181162948, 0.0001977496677320302, 0.0002027044420004567, 0.0002077829376491931, 0.0002129882250656997, 0.0002183234500797793, 0.0002237918357881408, 0.0002293966844219768, 0.0002351413792585279, 0.000241029386577625, 0.0002470642576642214, 0.0002532496308579494, 0.0002595892336507566, 0.0002660868848336966, 0.0002727464966939758, 0.0002795720772633756, 0.0002865677326191954, 0.0002937376692388838, 0.0003010861964095489, 0.0003086177286935637, 0.0003163367884515061, 0.0003242480084236977, 0.0003323561343716328, 0.0003406660277806112, 0.0003491826686249178, 0.0003579111581969171, 0.0003668567220014563, 0.000376024712717001, 0.0003854206132249513, 0.0003950500397086183, 0.0004049187448233665, 0.0004150326209394562, 0.0004253977034591532, 0.0004360201742096975, 0.0004469063649137576, 0.0004580627607390232, 0.0004694960039286239, 0.0004812128975140907, 0.0004932204091126076, 0.000505525674810337, 0.0005181360031336306, 0.0005310588791099715, 0.0005443019684205293, 0.0005578731216462397, 0.0005717803786093561, 0.0005860319728124557, 0.0006006363359769152, 0.0006156021026829091, 0.000630938115113015, 0.0006466534279015511, 0.0006627573130918034, 0.0006792592652033364, 0.0006961690064116211, 0.0007134964918422479, 0.0007312519149820298, 0.0007494457132093403, 0.0007680885734460649, 0.0007871914379335892, 0.0008067655101352761, 0.0008268222607679303, 0.0008473734339647847, 0.0008684310535725778, 0.0008900074295853387, 0.0009121151647175245, 0.0009347671611192043, 0.0009579766272360141, 0.0009817570848166535, 0.001006122376070726, 0.001031086670979776, 0.001056664474764392, 0.001082870635510324, 0.001109720351956542, 0.001137229181448275, 0.001165413048058037, 0.001194288250877728, 0.001223871472484935, 0.001254179787586566, 0.001285230671843014, 0.001317042010876085, 0.00134963210946393, 0.001383019700926299, 0.001417223956703434, 0.001452264496131976, 0.001488161396421275, 0.001524935202833549, 0.001562606939071335, 0.001601198117875751, 0.001640730751839069, 0.001681227364435175, 0.001722711001271476, 0.001765205241565872, 0.00180873420985243, 0.001853322587919396, 0.001898995626983253, 0.001945779160102487, 0.001993699614834812, 0.002042784026141569, 0.002093060049543048, 0.002144555974528508, 0.002197300738224652, 0.002251323939326354, 0.002306655852293415, 0.002363327441817152, 0.002421370377560591, 0.002480817049176087, 0.002541700581604132, 0.002604054850657139, 0.002667914498891978, 0.002733314951775009, 0.002800292434143347, 0.002868883986966086, 0.002939127484409164, 0.003011061651207529, 0.003084726080348234, 0.00316016125106806, 0.003237408547169193, 0.003316510275656482, 0.003397509685699698, 0.003480450987924205, 0.003565379374033356, 0.003652341036765878, 0.003741383190191436, 0.003832554090347455, 0.00392590305622025, 0.004021480491073363, 0.004119337904125929, 0.004219527932583799, 0.004322104364026003, 0.004427122159149049, 0.004534637474871384, 0.004644707687800227, 0.004757391418062857, 0.004872748553504218, 0.004990840274252621, 0.005111729077655073, 0.005235478803583624, 0.005362154660113921, 0.005491823249576928, 0.005624552594984594, 0.005760412166829977, 0.005899472910262141, 0.006041807272635855, 0.006187489231435873, 0.006336594322575303, 0.006489199669067264, 0.006645384010068763, 0.006805227730295351, 0.006968812889804859, 0.007136223254148082, 0.007307544324884002, 0.007482863370456688, 0.007662269457430684, 0.007845853482081234, 0.00803370820233528, 0.008225928270058738, 0.008422610263685083, 0.008623852721179779, 0.00882975617333458, 0.009040423177385288, 0.009255958350945847, 0.00947646840625129, 0.00970206218470127, 0.00993285069169548, 0.0101689471317515, 0.01041046694389498, 0.01065752783731153, 0.01091024982724872, 0.01116875527115614, 0.01143316890505047, 0.01170361788009201, 0.01198023179935796, 0.01226314275479724, 0.01255248536435057, 0.01284839680921864, 0.01315101687126047, 0.01346048797050273, 0.01377695520274025, 0.01410056637720657, 0.0144314720542925, 0.0147698255832895, 0.01511578314013354, 0.01546950376512417, 0.01583114940059188, 0.01620088492848602, 0.01657887820785407, 0.01696530011218168, 0.01736032456656176, 0.0177641285846592, 0.01817689230543655, 0.01859879902960456, 0.0190300352557596, 0.01947079071616878, 0.01992125841216182, 0.0203816346490868, 0.0208521190707856, 0.0213329146935426, 0.02182422793945886, 0.0223262686692015, 0.02283925021407662, 0.02336338940737162, 0.02389890661491098, 0.02444602576476733, 0.02500497437606729, 0.0255759835868295, 0.02615928818076976, 0.02675512661300579, 0.02736374103459174, 0.02798537731580974, 0.02862028506814347, 0.0292687176648557, 0.02993093226008914, 0.03060718980640679, 0.03129775507068535, 0.03200289664827172, 0.03272288697530984, 0.03345800233914146, 0.03420852288668146, 0.03497473263066433, 0.03575691945365549, 0.03655537510971647, 0.03737039522361032, 0.03820227928742852, 0.03905133065451731, 0.03991785653057697, 0.04080216796180298, 0.04170457981993396, 0.04262541078406616, 0.04356498331908998, 0.04452362365059855, 0.04550166173611388, 0.04649943123247031, 0.04751726945918985, 0.04855551735767846, 0.04961451944606608, 0.05069462376950812, 0.05179618184575883, 0.05291954860582172, 0.05406508232947467, 0.05523314457546145, 0.05642410010613365, 0.0576383168063203, 0.05887616559619478, 0.06013802033790106, 0.06142425773569317, 0.06273525722933401, 0.06407140088049086, 0.06543307325185634, 0.06682066127871474, 0.06823455413266438, 0.06967514307719713, 0.0711428213148264, 0.072637983825445, 0.07416102719558404, 0.07571234943823293, 0.07729234980287024, 0.07890142857534341, 0.08053998686722443, 0.08220842639425595, 0.08390714924349113, 0.08563655762871733, 0.08739705363374167, 0.089189038943103, 0.09101291455976203, 0.09286908050930745, 0.09475793553020233, 0.09667987674958092, 0.09863529934409158, 0.1006245961852672, 0.1026481574688893, 0.1047063703277982, 0.1067996184275844, 0.1089282815445828, 0.1110927351255762, 0.1132933498285966, 0.1155304910441992, 0.1178045183965694, 0.1201157852238032, 0.1224646380366905, 0.1248514159553127, 0.12727645012275, 0.1297400630951819, 0.1322425682076438, 0.1347842689146954, 0.1373654581052339, 0.1399864173906785, 0.1426474163657345, 0.1453487118409354, 0.1480905470461465, 0.1508731508042064, 0.1536967366738671, 0.1565615020611899, 0.1594676272985403, 0.1624152746903227, 0.1654045875245866, 0.1684356890496347, 0.1715086814147582, 0.1746236445742249, 0.1777806351536476, 0.1809796852778619, 0.1842208013594488, 0.1875039628470457, 0.1908291209325989, 0.1941961972167256, 0.1976050823313677, 0.2010556345189407, 0.2045476781672034, 0.2080810022991016, 0.2116553590168666, 0.215270461899688, 0.2189259843543164, 0.2226215579179944, 0.2263567705131663, 0.2301311646534653, 0.2339442356005408, 0.2377954294713478, 0.2416841412955955, 0.2456097130231236, 0.2495714314810632, 0.2535685262807218, 0.2576001676742365, 0.2616654643611356, 0.2657634612450648, 0.2698931371410522, 0.2740534024338134, 0.2782430966877313, 0.282460986209295, 0.2867057615629282, 0.2909760350413053, 0.2952703380914218, 0.2995871186978692, 0.3039247387249517, 0.3082814712194886, 0.3126554976763533, 0.317044905269024, 0.3214476840476519, 0.3258617241073967, 0.3302848127300502, 0.334714631249496, 0.3391487512788721, 0.3435846318153891, 0.3480196176733367, 0.3524509360694569, 0.3568756937856739, 0.3612908742401872, 0.3656933345613626, 0.3700798026796994, 0.3744468744406457, 0.37879101074545, 0.383108534727149, 0.3873956289691993, 0.3916483327746855, 0.3958625394944753, 0.4000339939231432, 0.4041582897719488, 0.4082308672286113, 0.4122470106141283, 0.41620184614738, 0.4200903398287755, 0.4239072954547266, 0.4276473527752828, 0.4313049858078249, 0.4348745013202958, 0.4383500374980593, 0.4417255628091039, 0.4449948750829726, 0.4481516008194941, 0.4511891947441224, 0.4541009396274731, 0.4568799463874778, 0.4595191544934711, 0.4620113326924998, 0.4643490800791934, 0.4665248275317041, 0.4685308395374905, 0.4703592164341476, 0.4720018970920534, 0.4734506620673722, 0.4746971372559238, 0.4757327980806474, 0.4765489742478735, 0.4771368551104038, 0.4774874956785291, 0.477591823323589, 0.4774406452225582, 0.4770246565964183, 0.4763344497997693, 0.4753605243242457, 0.4740932977838036, 0.4725231179558098, 0.4706402759580117, 0.4684350206478295, 0.465897574336846, 0.4630181499197369, 0.4597869695230009, 0.4561942847844879, 0.4522303988796765, 0.4478856904146353, 0.443150639308397, 0.4380158547888145, 0.4324721056256378, 0.426510352722386, 0.420121784184439, 0.4132978529745903, 0.4060303172590807, 0.3983112835369312, 0.3901332526333582, 0.3814891686243334, 0.372372470744171, 0.3627771483115889, 0.3526977986922039, 0.3421296882970682, 0.3310688165977583, 0.3195119831187734, 0.307456857347603, 0.2949020514817356, 0.2818471959100157, 0.2682930173029454, 0.2542414191626187, 0.2396955646577261, 0.2246599615422874, 0.2091405489282149, 0.1931447856512755, 0.1766817399373011, 0.1597621800404257, 0.1423986654875371, 0.1246056385229154, 0.1063995153040935, 0.08779877635425645, 0.06882405572798758, 0.04949822829587915, 0.02984649449951327, 0.009896461871685618, -0.0103217774423702, -0.03077557398840189, -0.05142965402029309, -0.07224605918523722, -0.09318409389778846, -0.1142002814911737, -0.1352483303022205, -0.1562791109140894, -0.1772406458480169, -0.1980781130607093, -0.2187338646670081, -0.2391474623669823, -0.2592557311116054, -0.2789928325903995, -0.2982903601665371, -0.3170774569183736, -0.3352809584696013, -0.3528255623013902, -0.3696340252370661, -0.3856273907709799, -0.400725247876021, -0.4148460228663026, -0.4279073058104098, -0.4398262128835481, -0.4505197859112192, -0.4599054301897767, -0.4679013914674403, -0.4744272727300562, -0.4794045911560771, -0.4827573752819081, -0.4844128020490203, -0.4843018729853075, -0.482360128302494, -0.4785283971667213, -0.4727535818188531, -0.4649894725831177, -0.4551975901066149, -0.4433480504178334, -0.4294204475803621, -0.4134047478501587, -0.3953021883239038, -0.3751261720963316, -0.3529031509316387, -0.3286734854055278, -0.3024922713993733, -0.2744301207377338, -0.2445738826685766, -0.2130272918081628, -0.1799115271282742, -0.14536566557382, -0.1095470129882907, -0.07263129422048935, -0.03481268361906553, 0.003696343374692608, 0.04266535711380943, 0.0818465387314527, 0.120975482596174, 0.1597722550239485, 0.197942727070024, 0.2351801978299515, 0.2711673228077153, 0.3055783595114282, 0.3380817394669869, 0.3683429722536052, 0.3960278829229651, 0.4208061792327512, 0.4423553394808251, 0.4603648053540388, 0.4745404571045492, 0.484609340551286, 0.4903246069083814, 0.4914706173212888, 0.4878681543253706, 0.4793796723395892, 0.4659145089096662, 0.4474339678951714, 0.4239561753650959, 0.3955605988774914, 0.3623921113626765, 0.3246644723399092, 0.2826630920493156, 0.2367469386889092, 0.1873494457617768, 0.134978276043165, 0.08021380137716269, 0.02370616392758294, -0.03382920484192568, -0.0916177157919324, -0.1488334920645406, -0.2046090388711463, -0.2580467424160458, -0.3082321888895327, -0.3542492498368048, -0.3951968294302978, -0.4302071125639673, -0.4584650908247196, -0.4792290771469557, -0.4918518505178026, -0.4958020010310605, -0.4906849748284285, -0.4762632503616966, -0.4524750147074661, -0.4194506545073621, -0.3775263339921943, -0.3272539062824321, -0.2694063977642589, -0.2049783229503163, -0.1351801329336137, -0.06142617818990675, 0.01468432051477682, 0.0913936483010772, 0.1668194838242039, 0.2389962318487994, 0.3059234644260829, 0.3656208289906668, 0.4161884463556769, 0.4558714644780451, 0.4831270660520506, 0.496691863517054, 0.4956472709842796, 0.4794801387965256, 0.4481356953295227, 0.4020596865245475, 0.3422265618499569, 0.2701506511470656, 0.1878775337598364, 0.09795323975981225, 0.003369557842383798, -0.09251543718097334, -0.1860814852396603, -0.2735734934517818, -0.35124938293654, -0.4155441827456777, -0.4632445878789431, -0.4916666983808717, -0.4988282908013138, -0.4836058546301746, -0.4458658775888225, -0.3865596116760081, -0.3077709190548494, -0.2127078910262316, -0.1056308356237104, 0.00828801893203305, 0.1231741688661095, 0.2327265445624194, 0.3305510539946817, 0.4105384207200283, 0.4672674392226495, 0.4964098130911053, 0.4951086433123969, 0.4622999154849149, 0.3989455224640179, 0.308147937612426, 0.1951210091844806, 0.06699870122404784, -0.06752604525555932, -0.1987271107714711, -0.3165153244206929, -0.4112164076594336, -0.4744043217076449, -0.499723635220171, -0.4836243424736711, -0.4259280585083366, -0.3301474340369417, -0.2034922971409078, -0.05651708685265205, 0.09760570560334157, 0.2441635991839944, 0.3682685775297774, 0.4563994224300647, 0.4979815404565464, 0.4868285300764764, 0.4222002309605242, 0.3093437913750844, 0.1594775626829866, -0.01105410607993743, -0.1823367010718788, -0.3330923483947517, -0.4433242736834649, -0.4970971296453459, -0.4850789298070443, -0.4064418318290014, -0.2697473572146973, -0.09253531780611382, 0.1005062912925828, 0.2807033631162947, 0.4195660083192265, 0.4934162208756996, 0.4877478330779747, 0.4005172368834696, 0.2436212404727749, 0.04202980209657466, -0.1696012323159912, -0.3525488084170822, -0.4710874661797677, -0.4997836173363122, -0.4295457742532802, -0.2709718518828135, -0.05387914153773801, 0.1774287731107132, 0.3726973166279175, 0.4866209005416548, 0.4898517741811003, 0.3774477001661045, 0.1722786714997939, -0.07820570454163168, -0.3119582505118796, -0.4673598866450677, -0.4998861488348805, -0.3959592316309146, -0.1795853799360581, 0.09122387744018579, 0.3386393017631028, 0.486947442034525, 0.4863897373799671, 0.3314349570838935, 0.06659705467084398, -0.2239879611183269, -0.4419381619344689, -0.5080442272222534, -0.3926513279561344, -0.131159638842465, 0.183479232716879, 0.4316620671127718, 0.5123724753884775, 0.3861536541988457, 0.09729107547444414, -0.237458709578817, -0.4734910295310395, -0.5009173966774688, -0.2985386345605389, 0.04782690262200534, 0.3769078745068928, 0.525016329791719, 0.4092067917408874, 0.07829114940804717, -0.3006350070741661, -0.5224280926617738, -0.4555575499153309, -0.1251606224129766, 0.2862965701540851, 0.5338482763797501, 0.455903304310532, 0.08570901789721774, -0.3501961990449067, -0.5634611836205874, -0.3954611181937175, 0.0568650962532125, 0.4829370186014508, 0.5648944257950091, 0.2186592188799192, -0.3129889116244797, -0.6157204091085092, -0.4233531654364162, 0.1385439196062137, 0.6102956683860189, 0.5623077018284641, -0.007808711727774371, -0.6109355724930764, -0.6643219504103295, -0.05490436516566257, 0.668007969629802, 0.7540209370107244, 0.02371220445150303, -0.8235268321362492, -0.8292515036235435, 0.1561617381586917, 1.110833706037405, 0.8279526897873001, -0.5926575264851783, -1.523063848747459, -0.5639088374846358, 1.44610261788616, 1.89916964697453, -0.3734665812350208, -2.815488569428936, -1.642767538027539, 2.663876010664339, 4.270177181722614, -0.7636185842576751, -6.813431783988215, -3.581181098415261, 7.98922117585164, 10.96064691367101, -5.498417048526511, -21.43579284760681, -4.658421697895847, 33.40026659724299, 29.08607194153782, -40.85667118415634, -77.68440336538364, 27.35150663714605, 162.3555910904419, 46.67334239222285, -288.0539951206264, -268.0786250286146, 419.9381067501886, 810.6015043438076, -383.916378006141 + ], + "label": "2P", + "angular_momentum": 1 + } + ], + "ps_wfc": [ + { + "index": 0, + "radial_function": [ + 0.0007326567981046568, 0.0007418724836832724, 0.0007512040882226465, 0.0007606530698002286, 0.0007702209048336892, 0.0007799090883116038, 0.0007897191340270411, 0.0007996525748140912, 0.0008097109627873619, 0.0008198958695844982, 0.0008302088866117387, 0.0008406516252925714, 0.000851225717319511, 0.000861932814909046, 0.0008727745910597929, 0.0008837527398138908, 0.0008948689765216923, 0.0009061250381097786, 0.0009175226833523449, 0.0009290636931460056, 0.000940749870788045, 0.0009525830422581759, 0.0009645650565038409, 0.0009766977857290982, 0.0009889831256871464, 0.001001422995976521, 0.001014019340341024, 0.001026774126973418, 0.001039689348822949, 0.001052767023906729, 0.001066009195625036, 0.001079417933080584, 0.001092995331401804, 0.001106743512070191, 0.001120664623251774, 0.001134760840132744, 0.001149034365259313, 0.001163487428881842, 0.001178122289303296, 0.001192941233232092, 0.001207946576139364, 0.001223140662620743, 0.001238525866762672, 0.001254104592513334, 0.00126987927405825, 0.001285852376200582, 0.00130202639474624, 0.001318403856893824, 0.001334987321629464, 0.001351779380126636, 0.001368782656150996, 0.001385999806470312, 0.00140343352126955, 0.001421086524571174, 0.001438961574660748, 0.001457061464517865, 0.001475389022252515, 0.001493947111546936, 0.001512738632103017, 0.00153176652009533, 0.001551033748629862, 0.001570543328208504, 0.0015902983071994, 0.001610301772313191, 0.001630556849085263, 0.001651066702364048, 0.001671834536805472, 0.001692863597373617, 0.001714157169847678, 0.001735718581335305, 0.001757551200792379, 0.001779658439549342, 0.001802043751844141, 0.001824710635361863, 0.001847662631781177, 0.001870903327327618, 0.001894436353333851, 0.001918265386806968, 0.001942394151002923, 0.001966826416008189, 0.00199156599932872, 0.002016616766486329, 0.002041982631622557, 0.002067667558110139, 0.002093675559172156, 0.002120010698508964, 0.002146677090933016, 0.002173678903011658, 0.002201020353718008, 0.002228705715090023, 0.002256739312897835, 0.002285125527319495, 0.002313868793625202, 0.002342973602870131, 0.002372444502595987, 0.002402286097541349, 0.00243250305036097, 0.002463100082354102, 0.002494081974201988, 0.002525453566714619, 0.002557219761586864, 0.00258938552216413, 0.002621955874217625, 0.002654935906729369, 0.002688330772687089, 0.002722145689889064, 0.002756385941759135, 0.002791056878171918, 0.002826163916288407, 0.002861712541402083, 0.002897708307795632, 0.002934156839608454, 0.00297106383171506, 0.003008435050614508, 0.003046276335331028, 0.003084593598325938, 0.00312339282642104, 0.003162680081733614, 0.003202461502623148, 0.003242743304649988, 0.003283531781545992, 0.003324833306197419, 0.00336665433164014, 0.003409001392067362, 0.003451881103850013, 0.00349530016656993, 0.003539265364066039, 0.003583783565493677, 0.0036288617263972, 0.0036745068897961, 0.003720726187284711, 0.003767526840145773, 0.003814916160477953, 0.003862901552337533, 0.003911490512894451, 0.003960690633602815, 0.004010509601386164, 0.004060955199837568, 0.00411203531043483, 0.004163757913770931, 0.004216131090799903, 0.004269163024098368, 0.004322861999142904, 0.004377236405603439, 0.00443229473865289, 0.004488045600293217, 0.004544497700698143, 0.004601659859572713, 0.004659541007529904, 0.004718150187484538, 0.004777496556064638, 0.004837589385040535, 0.004898438062771874, 0.004960052095672785, 0.005022441109695443, 0.005085614851832196, 0.005149583191636576, 0.005214356122763362, 0.005279943764527955, 0.00534635636348533, 0.005413604295028726, 0.005481698065008456, 0.005550648311370941, 0.005620465805818336, 0.00569116145548894, 0.005762746304658633, 0.005835231536463713, 0.005908628474645235, 0.005982948585315246, 0.006058203478745168, 0.006134404911176504, 0.006211564786654337, 0.006289695158883677, 0.006368808233109122, 0.00644891636801806, 0.00653003207766761, 0.006612168033435829, 0.006695337065997188, 0.00677955216732287, 0.006864826492706092, 0.006951173362812665, 0.007038606265757384, 0.007127138859206201, 0.007216784972504815, 0.007307558608833857, 0.007399473947390914, 0.007492545345600033, 0.007586787341348624, 0.007682214655252455, 0.007778842192948925, 0.00787668504741889, 0.007975758501337657, 0.008076078029455168, 0.008177659301005998, 0.008280518182149456, 0.008384670738439992, 0.008490133237328666, 0.008596922150695584, 0.008705054157414082, 0.008814546145946851, 0.008925415216974325, 0.009037678686056032, 0.009151354086324906, 0.009266459171215283, 0.009383011917224883, 0.009501030526711063, 0.009620533430722073, 0.009741539291863367, 0.009864067007199691, 0.009988135711193263, 0.01011376477867836, 0.01024097382787315, 0.01036978272342865, 0.01050021157951579, 0.01063228076295075, 0.01076601089635901, 0.01090142286137887, 0.01103853780190453, 0.01117737712736949, 0.01131796251607071, 0.01146031591853375, 0.01160445956091998, 0.01175041594847559, 0.01189820786902358, 0.01204785839649886, 0.01219939089452702, 0.01235282902004751, 0.01250819672698147, 0.01266551826994488, 0.01282481820800766, 0.01298612140849899, 0.01314945305085985, 0.01331483863054269, 0.01348230396295952, 0.01365187518747839, 0.01382357877146915, 0.01399744151439904, 0.0141734905519785, 0.01435175336035808, 0.01453225776037687, 0.01471503192186287, 0.01490010436798651, 0.01508750397966711, 0.01527726000003363, 0.0154694020389399, 0.01566396007753506, 0.01586096447288998, 0.01606044596268005, 0.01626243566992528, 0.01646696510778812, 0.0166740661844297, 0.01688377120792542, 0.01709611289124003, 0.01731112435726347, 0.01752883914390771, 0.01774929120926548, 0.01797251493683155, 0.01819854514078717, 0.01842741707134855, 0.01865916642017994, 0.018893829325872, 0.01913144237948637, 0.01937204263016686, 0.01961566759081836, 0.01986235524385386, 0.02011214404701051, 0.02036507293923545, 0.02062118134664202, 0.02088050918853731, 0.02114309688352169, 0.02140898535566092, 0.02167821604073205, 0.02195083089254334, 0.02222687238932934, 0.02250638354022183, 0.02278940789179724, 0.02307598953470159, 0.02336617311035346, 0.02366000381772593, 0.02395752742020843, 0.0242587902525488, 0.02456383922787701, 0.0248727218448107, 0.02518548619464379, 0.02550218096861881, 0.02582285546528355, 0.02614755959793334, 0.02647634390213894, 0.02680925954336181, 0.02714635832465678, 0.02748769269446319, 0.02783331575448549, 0.02818328126766369, 0.02853764366623475, 0.02889645805988568, 0.02925978024399885, 0.02962766670799073, 0.03000017464374427, 0.03037736195413638, 0.03075928726166059, 0.03114600991714605, 0.03153759000857354, 0.03193408836998908, 0.03233556659051611, 0.03274208702346675, 0.03315371279555284, 0.03357050781619778, 0.03399253678694927, 0.03441986521099422, 0.03485255940277612, 0.03529068649771551, 0.03573431446203445, 0.0361835121026852, 0.03663834907738406, 0.0370988959047507, 0.0375652239745535, 0.03803740555806179, 0.03851551381850474, 0.03899962282163828, 0.03948980754641986, 0.0399861438957915, 0.04048870870757185, 0.04099757976545734, 0.04151283581013271, 0.04203455655049135, 0.04256282267496581, 0.04309771586296796, 0.04363931879644008, 0.04418771517151601, 0.04474298971029289, 0.0453052281727136, 0.04587451736855928, 0.0464509451695526, 0.04703460052157114, 0.04762557345697072, 0.04822395510701862, 0.04882983771443591, 0.04944331464604929, 0.0500644804055508, 0.05069343064636595, 0.05133026218462919, 0.0519750730122654, 0.05262796231017786, 0.05328903046154056, 0.05395837906519472, 0.05463611094914799, 0.05532233018417498, 0.0560171420975184, 0.05672065328668879, 0.05743297163336164, 0.05815420631737017, 0.05888446783079158, 0.05962386799212559, 0.0603725199605625, 0.06113053825033882, 0.06189803874517838, 0.06267513871281534, 0.06346195681959788, 0.06425861314516845, 0.06506522919721806, 0.0658819279263115, 0.0667088337407791, 0.06754607252167272, 0.06839377163778118, 0.06925205996070088, 0.0701210678799584, 0.07100092731817853, 0.07189177174629527, 0.07279373619879857, 0.0737069572890128, 0.07463157322440082, 0.07556772382188703, 0.07651555052319464, 0.07747519641018898, 0.0784468062202208, 0.07943052636146229, 0.08042650492822728, 0.08143489171626883, 0.08245583823804492, 0.0834894977379434, 0.08453602520745773, 0.08559557740030269, 0.08666831284746045, 0.0877543918721468, 0.08885397660468543, 0.08996723099727907, 0.09109432083866602, 0.09223541376864716, 0.09339067929247291, 0.09456028879507397, 0.09574441555512228, 0.09694323475890697, 0.09815692351400863, 0.09938566086275653, 0.1006296277954504, 0.1018890072633288, 0.1031639841912665, 0.1044547454901789, 0.1057614800691148, 0.1070843788470157, 0.1084236347641185, 0.1097794427929796, 0.1111519999490953, 0.1125415053010927, 0.1139481599804665, 0.1153721671908327, 0.1168137322166714, 0.1182730624315287, 0.1197503673056461, 0.1212458584129875, 0.1227597494376267, 0.1242922561794643, 0.1258435965592339, 0.1274139906227632, 0.1290036605444483, 0.1306128306299022, 0.1322417273177333, 0.1338905791804106, 0.1355596169241695, 0.1372490733879112, 0.1389591835410444, 0.1406901844802201, 0.1424423154249029, 0.1442158177117264, 0.1460109347875728, 0.1478279122013168, 0.1496669975941735, 0.1515284406885818, 0.1534124932755604, 0.1553194092004636, 0.1572494443470658, 0.1592028566199001, 0.1611799059247708, 0.163180854147362, 0.1652059651298567, 0.1672555046454794, 0.1693297403708728, 0.1714289418562138, 0.1735533804929739, 0.1757033294792216, 0.1778790637823631, 0.1800808600992151, 0.182308996813294, 0.1845637539492101, 0.1868454131240437, 0.1891542574955801, 0.1914905717072747, 0.1938546418298138, 0.1962467552991349, 0.1986672008507627, 0.2011162684503126, 0.2035942492200103, 0.206101435361067, 0.2086381200717486, 0.211204597460969, 0.2138011624572319, 0.2164281107127418, 0.2190857385024943, 0.2217743426181557, 0.2244942202565302, 0.2272456689024096, 0.2300289862055912, 0.2328444698518446, 0.235692417427601, 0.2385731262781297, 0.2414868933589594, 0.2444340150802979, 0.2474147871441869, 0.2504295043741337, 0.2534784605369408, 0.2565619481564536, 0.2596802583189367, 0.2628336804697765, 0.2660225022012055, 0.2692470090307293, 0.27250748416993, 0.2758042082833109, 0.2791374592368361, 0.282507511835812, 0.2859146375517439, 0.2893591042377938, 0.292841175832456, 0.2963611120510519, 0.2999191680646435, 0.3035155941659463, 0.3071506354218171, 0.310824531311882, 0.3145375153528526, 0.318289814708079, 0.3220816497818678, 0.325913233798088, 0.3297847723625772, 0.333696463008845, 0.3376484947265704, 0.3416410474723686, 0.3456742916623014, 0.3497483876455935, 0.353863485159001, 0.3580197227612838, 0.3622172272472104, 0.3664561130405244, 0.3707364815652914, 0.3750584205950375, 0.3794220035790864, 0.3838272889454949, 0.38827431937998, 0.3927631210802336, 0.3972937029850046, 0.4018660559773453, 0.4064801520613974, 0.411135943512112, 0.4158333619972852, 0.4205723176713028, 0.4253526982399936, 0.4301743679959895, 0.4350371668240091, 0.4399409091754869, 0.4448853830119824, 0.4498703487168267, 0.4548955379744736, 0.4599606526170504, 0.4650653634376238, 0.4702093089697268, 0.4753920942327229, 0.4806132894426196, 0.4858724286879818, 0.4911690085706402, 0.4965024868109364, 0.5018722808173045, 0.5072777662200418, 0.5127182753691891, 0.5181930957965113, 0.5237014686416441, 0.5292425870425583, 0.5348155944905842, 0.5404195831503327, 0.5460535921449633, 0.5517166058073563, 0.5574075518978799, 0.5631252997895665, 0.5688686586216641, 0.5746363754226811, 0.5804271332041995, 0.5862395490269273, 0.5920721720406323, 0.5979234814998051, 0.6037918847571311, 0.6096757152370487, 0.615573230391955, 0.621482609643846, 0.627401952314471, 0.633329275547377, 0.6392625122255091, 0.6451995088883884, 0.6511380236532052, 0.6570757241445573, 0.6630101854379293, 0.6689388880224201, 0.6748592157886588, 0.6807684540482846, 0.6866637875918418, 0.6925422987924381, 0.6984009657630101, 0.7042366605756024, 0.7100461475515996, 0.7158260816324528, 0.7215730068410392, 0.7272833548444184, 0.732953443629408, 0.7385794763030664, 0.7441575400308669, 0.7496836051260625, 0.7551535243044665, 0.7605630321196393, 0.7659077445942195, 0.7711831590639369, 0.7763846542516234, 0.7815074905893479, 0.7865468108076055, 0.7914976408113066, 0.7963548908631064, 0.8011133570954403, 0.8057677233733931, 0.810312563531331, 0.8147423440069554, 0.8190514268971639, 0.8232340734607849, 0.8272844480938788, 0.8311966228038805, 0.8349645822093585, 0.8385822290925944, 0.842043390532521, 0.8453418246457852, 0.8484712279638078, 0.851425243473679, 0.8541974693505444, 0.8567814684087727, 0.8591707782986405, 0.8613589224744952, 0.8633394219593484, 0.8651058079295699, 0.8666516351417805, 0.8679704962221614, 0.869056036836151, 0.8699019717538857, 0.8705021018237089, 0.87085033186259, 0.8709406894683412, 0.8707673447540288, 0.8703246309999446, 0.8696070662128613, 0.8686093755760286, 0.8673265147664154, 0.8657536941080403, 0.8638864035218117, 0.861720438223078, 0.859251925108042, 0.856477349759272, 0.853393583988708, 0.8499979138238112, 0.8462880678287749, 0.842262245638015, 0.8379191465634508, 0.8332579981203739, 0.8282785842990014, 0.8229812733900839, 0.8173670451532724, 0.8114375170963313, 0.8051949696117802, 0.7986423696952627, 0.7917833929468964, 0.784622443533223, 0.7771646717633056, 0.7694159889080703, 0.7613830788675174, 0.7530734062660278, 0.744495220532002, 0.7356575554947891, 0.726570224009629, 0.7172438071005643, 0.7076896370924123, 0.6979197741864257, 0.6879469759208208, 0.6777846589474026, 0.667446852549985, 0.6569481433296531, 0.6463036104871738, 0.635528751144866, 0.624639395169787, 0.6136516089884614, 0.6025815879213523, 0.5914455366141911, 0.5802595372042668, 0.569039404933949, 0.5578005310125479, 0.5465577126321691, 0.5353249701648968, 0.5241153517087527, 0.512940725309458, 0.5018115593655355, 0.4907366919264777, 0.4797230898187422, 0.4687755987829088, 0.4578974352706099, 0.447093077444211, 0.4363674559971504, 0.4257254121294227, 0.4151716910399461, 0.404710935499643, 0.3943476795245495, 0.3840863421691225, 0.3739312214607297, 0.3638864884970675, 0.3539561817289328, 0.3441442014513292, 0.3344543045262986, 0.324890099361078, 0.3154550411651461, 0.3061524275094077, 0.2969853942101068, 0.2879569115590331, 0.2790697809201509, 0.2703266317109191, 0.261729918784279, 0.2532819202245788, 0.2449847355676227, 0.2368402844516299, 0.2288503057022588, 0.2210163568510869, 0.2133398140831597, 0.2058218726055535, 0.1984635474254624, 0.1912656745232305, 0.1842289124031042, 0.1773537440023521, 0.1706404789378337, 0.1640892560681068, 0.1577000463487357, 0.1514726559585547, 0.1454067296751852, 0.1395017544790294, 0.1337570633661707, 0.1281718393520159, 0.1227451196490225, 0.117475800003391, 0.1123626391770916, 0.1074042635629868, 0.1025991719220554, 0.09794574023279894, 0.09344222664379612, 0.08908677652106062, 0.0848774275823569, 0.08081211511094663, 0.07688867724139231, 0.07310486031005288, 0.06945832426279122, 0.06594664811219547, 0.0625673354363219, 0.059317819910612, 0.05619547086424664, 0.05319759885178949, 0.05032146123056016, 0.04756426773377823, 0.04492318602914504, 0.04239534725219314, 0.03997785150344197, 0.03766777329816068, 0.03546216695736096, 0.03335807192853002, 0.03135251802456817, 0.02944253056942155, 0.02762513543899727, 0.02589736398611831, 0.02425625783851661, 0.02269887355917488, 0.02122228715870825, 0.0198235984499238, 0.01849993523520555, 0.01724845731794185, 0.01606636032983634, 0.01495087936661895, 0.01389929242539452, 0.01290892363762827, 0.01197714629256485, 0.01110138564670424, 0.01027912151580862, 0.00950789064678274, 0.008785288867651194, 0.008108973014741765, 0.007476662637070715, 0.006886141478805549, 0.006335258741548955, 0.0058219301290379, 0.005344138677678987, 0.004899935377139804, 0.004487439585981231, 0.004104839248042677, 0.003750390915976883, 0.003422419588969117, 0.003119318372263861, 0.0028395479666574, 0.002581635996594272, 0.002344176185927313, 0.002125827390763242, 0.001925312499117178, 0.00174141720733956, 0.001572988683457271, 0.00141893412768774, 0.001278219240441158, 0.001149866608122779, 0.001032954016986381, 0.0009266127051732982, 0.000830025562901455, 0.0007424252905484189, 0.0006630925241046621, 0.0005913539371615143, 0.0005265803282462763, 0.0004681847019285781, 0.0004156203517013193, 0.0003683789521905884, 0.0003259886677760365, 0.0002880122842105875, 0.0002540453693202501, 0.0002237144683454482, 0.0001966753389586983, 0.0001726112304635633, 0.0001512312111503379, 0.0001322685472583229, 0.0001154791364760257, 0.0001006399984020041, 8.754782389283852e-05, 7.601758474287825e-05, 6.588120467451082e-05, 5.698629216871205e-05, 4.919493523389763e-05, 4.238255779622065e-05, 3.643683699521201e-05, 3.125668028282036e-05, 2.675126084807199e-05, 2.283910951891865e-05, 1.944726092080441e-05, 1.651045128921984e-05, 1.39703649323465e-05, 1.177492587661387e-05, 9.8776311233219e-06, 8.236916256783757e-06, 6.815592465775609e-06, 5.580080621321123e-06, 4.504806879247951e-06, 3.539724597409186e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "1S", + "angular_momentum": 0 + }, + { + "index": 1, + "radial_function": [ + 0.0004046524429257462, 0.0004097423409275699, 0.0004148962618446819, 0.0004201150109816314, 0.0004253994037722769, 0.0004307502659071897, 0.0004361684334626606, 0.0004416547530313302, 0.0004472100818544584, 0.0004528352879558635, 0.000458531250277538, 0.0004642988588169764, 0.0004701390147662263, 0.0004760526306526905, 0.0004820406304816992, 0.0004881039498808727, 0.0004942435362463036, 0.000500460348890575, 0.0005067553591926396, 0.0005131295507495872, 0.000519583919530315, 0.0005261194740311353, 0.0005327372354333384, 0.0005394382377627359, 0.0005462235280512129, 0.0005530941665003078, 0.000560051226646853, 0.0005670957955306969, 0.0005742289738645337, 0.0005814518762058728, 0.0005887656311311653, 0.000596171381412124, 0.0006036702841942593, 0.00061126351117766, 0.0006189522488000485, 0.0006267376984221354, 0.0006346210765153062, 0.0006426036148516689, 0.0006506865606964894, 0.000658871177003049, 0.0006671587426099485, 0.0006755505524408958, 0.0006840479177070048, 0.0006926521661116381, 0.0007013646420578264, 0.0007101867068582911, 0.0007191197389481135, 0.000728165134100074, 0.0007373243056427007, 0.000746598684681059, 0.0007559897203203149, 0.0007654988798921124, 0.0007751276491837948, 0.0007848775326705072, 0.0007947500537502218, 0.0008047467549817089, 0.0008148691983255082, 0.0008251189653879249, 0.0008354976576680931, 0.0008460068968081498, 0.0008566483248465448, 0.0008674236044745435, 0.0008783344192959492, 0.0008893824740900912, 0.000900569495078122, 0.0009118972301926536, 0.0009233674493507916, 0.0009349819447305972, 0.0009467425310510217, 0.0009586510458553679, 0.0009707093497983014, 0.0009829193269364819, 0.000995282885022837, 0.001007801955804542, 0.001020478495324741, 0.001033314484228058, 0.001046311928069951, 0.001059472857629951, 0.001072799329228839, 0.001086293425049811, 0.001099957253463666, 0.001113792949358103, 0.001127802674471131, 0.001141988617728684, 0.001156352995586478, 0.001170898052376146, 0.00118562606065575, 0.001200539321564666, 0.001215640165182952, 0.001230930950895218, 0.001246414067759067, 0.001262091934878169, 0.001277967001780021, 0.001294041748798443, 0.001310318687460895, 0.001326800360880642, 0.001343489344153853, 0.001360388244761685, 0.001377499702977417, 0.001394826392278701, 0.001412371019764977, 0.001430136326580143, 0.001448125088340529, 0.001466340115568236, 0.001484784254129931, 0.001503460385681133, 0.001522371428116091, 0.001541520336023304, 0.001560910101146753, 0.001580543752852942, 0.00160042435860377, 0.00162055502443537, 0.001640938895442934, 0.001661579156271631, 0.001682479031613696, 0.001703641786711732, 0.001725070727868346, 0.001746769202962175, 0.001768740601970382, 0.001790988357497712, 0.001813515945312176, 0.001836326884887465, 0.001859424739952152, 0.0018828131190458, 0.001906495676082032, 0.001930476110918656, 0.001954758169934953, 0.001979345646616196, 0.002004242382145486, 0.002029452266003031, 0.002054979236572908, 0.002080827281757448, 0.002107000439599316, 0.002133502798911381, 0.00216033849991449, 0.002187511734883212, 0.002215026748799689, 0.002242887840015668, 0.002271099360922825, 0.002299665718631491, 0.00232859137565786, 0.002357880850619814, 0.002387538718941457, 0.002417569613566461, 0.002447978225680349, 0.002478769305441805, 0.00250994766272314, 0.002541518167860024, 0.00257348575241059, 0.002605855409924042, 0.002638632196718852, 0.002671821232670717, 0.00270542770201034, 0.002739456854131196, 0.002773914004407398, 0.002808804535021758, 0.002844133895804221, 0.002879907605080756, 0.002916131250532843, 0.002952810490067721, 0.002989951052699454, 0.003027558739441032, 0.003065639424207591, 0.003104199054730895, 0.00314324365348525, 0.00318277931862491, 0.003222812224933257, 0.00326334862478372, 0.003304394849112734, 0.003345957308404813, 0.003388042493689845, 0.003430656977552884, 0.003473807415156447, 0.003517500545275574, 0.003561743191345766, 0.003606542262523911, 0.003651904754762504, 0.003697837751897112, 0.003744348426747431, 0.003791444042232005, 0.003839131952496757, 0.003887419604057607, 0.003936314536957196, 0.003985824385936014, 0.004035956881618057, 0.004086719851711125, 0.004138121222222111, 0.004190169018687235, 0.004242871367417581, 0.004296236496760069, 0.004350272738373946, 0.004404988528523205, 0.004460392409384855, 0.004516493030373448, 0.004573299149481952, 0.004630819634639138, 0.00468906346508381, 0.004748039732755905, 0.004807757643704795, 0.004868226519514959, 0.004929455798749142, 0.00499145503840941, 0.005054233915416047, 0.005117802228104741, 0.005182169897742147, 0.005247346970060006, 0.005313343616808222, 0.005380170137326871, 0.005447836960137551, 0.005516354644554241, 0.005585733882313804, 0.005655985499226565, 0.005727120456846962, 0.005799149854164649, 0.005872084929316293, 0.005945937061318136, 0.006020717771819836, 0.00609643872687952, 0.006173111738760535, 0.006250748767750024, 0.00632936192399953, 0.006408963469388089, 0.00648956581940774, 0.006571181545071993, 0.006653823374847388, 0.006737504196608306, 0.006822237059615549, 0.006908035176518649, 0.006994911925382372, 0.007082880851737646, 0.007171955670657043, 0.007262150268855331, 0.007353478706815094, 0.007445955220937885, 0.007539594225721123, 0.007634410315960907, 0.007730418268981281, 0.007827633046889881, 0.007926069798860545, 0.008025743863443001, 0.008126670770899902, 0.008228866245571658, 0.00833234620826911, 0.008437126778694511, 0.008543224277891044, 0.008650655230721109, 0.008759436368373773, 0.008869584630901543, 0.008981117169786858, 0.009094051350538573, 0.009208404755318589, 0.009324195185599174, 0.009441440664850968, 0.009560159441262213, 0.009680369990489346, 0.009802091018439277, 0.009925341464083704, 0.01005014050230563, 0.01017650754677848, 0.01030446225287808, 0.0104340245206277, 0.0105652144976766, 0.01069805258231214, 0.01083255942650601, 0.0109687559389946, 0.01110666328839396, 0.01124630290634953, 0.01138769649072091, 0.01153086600880208, 0.01167583370057708, 0.01182262208201165, 0.01197125394838096, 0.0121217523776337, 0.01227414073379283, 0.01242844267039315, 0.012584682133956, 0.0127428833675013, 0.01290307091409711, 0.01306526962044705, 0.01322950464051564, 0.01339580143919187, 0.01356418579599121, 0.01373468380879615, 0.01390732189763553, 0.01408212680850287, 0.01425912561721368, 0.01443834573330222, 0.01461981490395746, 0.01480356121799866, 0.01498961310989071, 0.01517799936379895, 0.01536874911768411, 0.01556189186743698, 0.01575745747105312, 0.01595547615284763, 0.01615597850770991, 0.01635899550539848, 0.0165645584948759, 0.01677269920868368, 0.01698344976735715, 0.01719684268388023, 0.01741291086818011, 0.01763168763166144, 0.01785320669178021, 0.01807750217665705, 0.0183046086297295, 0.01853456101444356, 0.01876739471898361, 0.01900314556104105, 0.01924184979262087, 0.01948354410488606, 0.01972826563303932, 0.01997605196124175, 0.02022694112756804, 0.02048097162899759, 0.02073818242644101, 0.02099861294980159, 0.02126230310307075, 0.02152929326945716, 0.02179962431654853, 0.02207333760150538, 0.02235047497628589, 0.02263107879290098, 0.02291519190869864, 0.02320285769167639, 0.02349412002582097, 0.0237890233164738, 0.02408761249572128, 0.02438993302780841, 0.0246960309145744, 0.02500595270090896, 0.02531974548022723, 0.0256374568999624, 0.02595913516707371, 0.02628482905356826, 0.02661458790203475, 0.02694846163118681, 0.02728650074141423, 0.02762875632033942, 0.02797528004837698, 0.02832612420429396, 0.0286813416707677, 0.02904098593993924, 0.02940511111895874, 0.02977377193552034, 0.0301470237433832, 0.03052492252787496, 0.03090752491137485, 0.03129488815877222, 0.03168707018289699, 0.03208412954991782, 0.03248612548470384, 0.0328931178761457, 0.03330516728243111, 0.03372233493627033, 0.03414468275006644, 0.03457227332102496, 0.0350051699361978, 0.03544343657745551, 0.03588713792638173, 0.03633633936908408, 0.0367911070009142, 0.03725150763109111, 0.03771760878721998, 0.03818947871969916, 0.03866718640600819, 0.03915080155486778, 0.03964039461026441, 0.04013603675533009, 0.0406377999160684, 0.04114575676491757, 0.04165998072413982, 0.04218054596902789, 0.04270752743091704, 0.04324100079999168, 0.04378104252787517, 0.04432772982999009, 0.04488114068767706, 0.0454413538500585, 0.04600844883563372, 0.04658250593359151, 0.04716360620482456, 0.04775183148263137, 0.04834726437308891, 0.04894998825507963, 0.04956008727995549, 0.05017764637082126, 0.05080275122141756, 0.05143548829458546, 0.05207594482029131, 0.05272420879319142, 0.0533803689697148, 0.0540445148646406, 0.05471673674714782, 0.05539712563631193, 0.05608577329602338, 0.05678277222930195, 0.05748821567197866, 0.05820219758571789, 0.05892481265034973, 0.05965615625548171, 0.06039632449135891, 0.06114541413893859, 0.06190352265914587, 0.06267074818127481, 0.06344718949049796, 0.06423294601444651, 0.06502811780882103, 0.0658328055419927, 0.06664711047855128, 0.06747113446175707, 0.06830497989485042, 0.06914874972117116, 0.07000254740304028, 0.07086647689935141, 0.07174064264182088, 0.07262514950984111, 0.07352010280388091, 0.07442560821737462, 0.07534177180703959, 0.07626869996155894, 0.07720649936856559, 0.0781552769798594, 0.07911513997478926, 0.08008619572172765, 0.08106855173756328, 0.08206231564513626, 0.08306759512853434, 0.08408449788617031, 0.08511313158155465, 0.08615360379167569, 0.08720602195289773, 0.08827049330428186, 0.08934712482823498, 0.09043602318838564, 0.09153729466458425, 0.09265104508492217, 0.09377737975465804, 0.09491640338194042, 0.09606822000020848, 0.09723293288715121, 0.09841064448010065, 0.09960145628773086, 0.1008054687979322, 0.1020227813817234, 0.1032534921930633, 0.104497698064417, 0.1057554943979286, 0.1070269750520489, 0.1083122322234604, 0.1096113563241382, 0.1109244358533824, 0.1122515572646503, 0.113592804827014, 0.1149482604810649, 0.1163180036890787, 0.1177021112792547, 0.119100657283832, 0.1205137127708882, 0.1219413456696135, 0.1233836205888538, 0.1248405986287097, 0.1263123371849713, 0.127798889746169, 0.1293003056830105, 0.1308166300299718, 0.1323479032588063, 0.1338941610437283, 0.1354554340180266, 0.1370317475218566, 0.1386231213409561, 0.1402295694360258, 0.1418510996625118, 0.1434877134805225, 0.1451394056546121, 0.146806163943154, 0.1484879687770316, 0.1501847929273641, 0.1518966011619889, 0.1536233498904168, 0.1553649867969758, 0.1571214504618595, 0.1588926699697924, 0.1606785645060307, 0.162479042939412, 0.1642940033921722, 0.1661233327962514, 0.1679669064358091, 0.1698245874756804, 0.1716962264755034, 0.1735816608892574, 0.1754807145499614, 0.1773931971392832, 0.1793189036418305, 0.1812576137838964, 0.1832090914564518, 0.1851730841221894, 0.1871493222064382, 0.1891375184717914, 0.1911373673763054, 0.1931485444151531, 0.1951707054456392, 0.1972034859955115, 0.1992465005545339, 0.2012993418493178, 0.2033615801014464, 0.2054327622689617, 0.2075124112713304, 0.2096000251980477, 0.2116950765010897, 0.2137970111714741, 0.215905247900254, 0.218019177224322, 0.2201381606574778, 0.2222615298072765, 0.2243885854782561, 0.2265185967622219, 0.2286508001163536, 0.2307843984299969, 0.232918560081096, 0.235052417983335, 0.2371850686251653, 0.2393155711020162, 0.2414429461431166, 0.2435661751344863, 0.2456841991397996, 0.247795917920977, 0.2499001889605156, 0.2519958264877416, 0.2540816005113432, 0.2561562358607268, 0.2582184112389405, 0.2602667582901074, 0.2622998606845322, 0.2643162532248701, 0.2663144209769762, 0.2682927984293098, 0.270249768685013, 0.2721836626910635, 0.2740927585091615, 0.2759752806333179, 0.2778293993593891, 0.2796532302121266, 0.281444833435618, 0.2832022135533243, 0.2849233190042547, 0.2866060418621612, 0.2882482176449846, 0.2898476252221441, 0.2914019868276185, 0.2929089681871373, 0.2943661787681662, 0.2957711721617429, 0.2971214466055844, 0.2984144456582598, 0.2996475590345736, 0.3008181236126705, 0.3019234246237038, 0.3029606970352474, 0.3039271271399416, 0.3048198543611514, 0.3056359732876889, 0.3063725359498818, 0.3070265543494776, 0.3075950032560323, 0.3080748232825495, 0.3084629242532002, 0.3087561888759563, 0.3089514767329121, 0.3090456286009265, 0.3090354711150031, 0.3089178217865109, 0.3086894943879338, 0.3083473047153149, 0.3078880767389076, 0.3073086491517677, 0.3066058823250893, 0.3057766656780037, 0.3048179254682987, 0.3037266330090789, 0.3024998133147438, 0.3011345541778118, 0.2996280156760404, 0.2979774401069749, 0.2961801623444854, 0.2942336206090146, 0.292135367640133, 0.2898830822565842, 0.2874745812852729, 0.2849078318366082, 0.2821809638992391, 0.2792922832225057, 0.276240284449879, 0.2730236644612523, 0.2696413358761977, 0.2660924406642025, 0.2623763638014528, 0.2584927469069681, 0.2544415017837968, 0.25022282378361, 0.2458372049053836, 0.2412854465309862, 0.2365686716924399, 0.2316883367574211, 0.226646242411327, 0.2214445438059709, 0.2160857597368235, 0.2105727807027528, 0.2049088756945582, 0.1990976975513823, 0.1931432867174533, 0.1870500732257173, 0.1808228767299792, 0.1744669044033211, 0.1679877465181156, 0.161391369522042, 0.1546841064254877, 0.1478726443188171, 0.1409640088434991, 0.1339655454493934, 0.126884897281844, 0.1197299795570673, 0.1125089503029489, 0.1052301773651898, 0.09790220160615346, 0.09053369625613783, 0.08313342241450529, 0.07571018074155703, 0.06827275943151973, 0.06082987861291542, 0.05339013138513553, 0.04596192176947194, 0.03855339992934893, 0.03117239509806995, 0.02382634674305351, 0.01652223459308986, 0.009266508259282977, 0.00206501729061874, -0.005077057379245027, -0.0121552695171487, -0.01916597000719865, -0.02610636427329378, -0.03297456325593792, -0.03976962636820731, -0.04649159473645088, -0.05314151291936298, -0.05972143719770618, -0.06623442843989653, -0.07268452748022655, -0.07907671090116586, -0.08541682509426496, -0.09171149649114335, -0.09796801591289425, -0.1041941950894326, -0.1103981935563954, -0.1165883143532157, -0.1227727672288831, -0.1289593984186171, -0.1351553864921365, -0.1413669042985699, -0.147598747650211, -0.1538539321019007, -0.1601332599982946, -0.1664352136769181, -0.1727576707508916, -0.1790985683202201, -0.1854558108337941, -0.191827270849385, -0.1982107897458776, -0.2046041783764148, -0.2110052176510472, -0.2174116590376415, -0.2238212249702174, -0.2302316091546157, -0.2366404767624705, -0.2430454645058991, -0.2494441805871656, -0.2558342045198057, -0.2622130868203326, -0.2685783485726385, -0.2749274808705105, -0.2812579441472359, -0.2875671674049604, -0.2938525473601873, -0.3001114475254201, -0.3063411972503083, -0.3125390907486236, -0.3187023861398069, -0.3248283045355949, -0.3309140292032329, -0.3369567048369765, -0.3429534369689423, -0.3489012915489211, -0.3547972947205871, -0.3606384328187133, -0.3664216526086851, -0.3721438617859266, -0.3778019297489795, -0.3833926886560574, -0.3889129347710689, -0.3943594301014886, -0.3997289043271408, -0.4050180570160199, -0.4102235601207331, -0.4153420607470392, -0.4203701841842549, -0.425304537185993, -0.4301417114887398, -0.4348782875551354, -0.4395108385284411, -0.4440359343845106, -0.4484501462675916, -0.4527500509964119, -0.4569322357272353, -0.4609933027608495, -0.4649298744807683, -0.4687385984102571, -0.4724161523761104, -0.4759592497674215, -0.4793646448778577, -0.482629138320209, -0.4857495825021919, -0.4887228871526712, -0.4915460248876087, -0.4942160368051624, -0.4967300380994446, -0.4990852236824982, -0.5012788738040896, -0.5033083596589163, -0.5051711489708292, -0.5068648115436347, -0.5083870247680132, -0.5097355790740392, -0.5109083833187351, -0.511903470098031, -0.5127190009724424, -0.5133532715957078, -0.5138047167355715, -0.514071915175829, -0.5141535944886945, -0.5140486356664957, -0.5137560776016519, -0.5132751214038427, -0.512605134543246, -0.5117456548086897, -0.5106963940695471, -0.5094572418301954, -0.508028268565863, -0.5064097288287068, -0.5046020641129925, -0.5026059054682966, -0.5004220758497177, -0.4980515921941605, -0.4954956672118727, -0.4927557108825315, -0.4898333316453413, -0.4867303372727807, -0.4834487354178548, -0.4799907338249547, -0.4763587401947137, -0.4725553616935744, -0.4685834040991489, -0.4644458705728742, -0.4601459600519292, -0.4556870652529013, -0.4510727702802708, -0.4463068478334196, -0.4413932560065748, -0.43633613467687, -0.4311398014765479, -0.4258087473462416, -0.4203476316672629, -0.4147612769718873, -0.4090546632317749, -0.4032329217258792, -0.3973013284905031, -0.3912652973555308, -0.3851303725723192, -0.3789022210402537, -0.3725866241405668, -0.366189469187675, -0.3597167404963741, -0.3531745101483091, -0.3465689283310007, -0.3399062134118212, -0.3331926416924003, -0.3264345368796034, -0.3196382592961174, -0.3128101948555503, -0.3059567438288026, -0.2990843094302745, -0.2921992862542285, -0.2853080485933035, -0.2784169386727746, -0.2715322548356339, -0.2646602397149378, -0.2578070684310869, -0.2509788368527818, -0.2441815499612898, -0.2374211103583774, -0.2307033069587687, -0.2240338039082907, -0.2174181297689351, -0.2108616670119035, -0.2043696418592857, -0.1979471145143655, -0.1915989698196214, -0.1853299083803147, -0.1791444381901236, -0.1730468667935596, -0.1670412940180407, -0.1611316053061411, -0.1553214656762782, -0.149614314337398, -0.1440133599803114, -0.1385215767653309, -0.1331417010226376, -0.1278762286784391, -0.1227274134164862, -0.1176972655809234, -0.1127875518227794, -0.1079997954886896, -0.103335277746711, -0.09879503944036255, -0.09437988365834526, -0.09009037900377674, -0.08592686354325973, -0.08188944941270891, -0.07797802805361975, -0.0741922760503988, -0.07053166153651155, -0.06699545113456418, -0.06358271739303872, -0.06029234668026443, -0.0571230474943469, -0.05407335914620341, -0.05114166077157652, -0.04832618062692547, -0.04562500562343039, -0.04303609105298739, -0.04055727046002326, -0.03818626561320895, -0.0359206965316971, -0.03375809152133732, -0.03169589717742344, -0.02973148831188332, -0.02786217776441828, -0.02608522605891617, -0.02439785086848022, -0.02279723625461364, -0.02128054164845402, -0.0198449105444408, -0.01848747887939697, -0.01720538307269203, -0.01599576770590034, -0.01485579282315571, -0.0137826408362062, -0.01277352302096904, -0.01182568559515474, -0.01093641536925097, -0.01010304496581119, -0.009322957604563746, -0.008593591453328036, -0.007912443547080457, -0.007277073279742562, -0.006685105475355373, -0.006134233047248488, -0.005622219255602828, -0.005146899575435944, -0.004706183188504682, -0.004298054113919253, -0.003920571993394454, -0.003571872548028406, -0.003250167724298767, -0.002953745547603742, -0.002680969702154765, -0.002430278856354509, -0.002200185752973884, -0.001989276083481443, -0.001796207165787711, -0.001619706444428127, -0.001458569831997065, -0.001311659909926095, -0.001177904003561235, -0.001056292298860245, -0.0009458741511910084, -0.0008457665367579705, -0.0007551128232988381, -0.0006731669561342397, -0.0005992381944704354, -0.0005326484608256568, -0.0004727574313272394, -0.0004189714752396455, -0.0003707411178464742, -0.0003275585666763199, -0.0002889553055715211, -0.0002544997607233836, -0.0002237950422060982, -0.0001964767639823004, -0.0001722109448079137, -0.0001506919919355045, -0.0001316407690065774, -0.0001148027490364716, -9.994625293293959e-05, -8.686077355295362e-05, -7.535538489335472e-05, -6.525723563089953e-05, -5.641012587703571e-05, -4.867316569301852e-05, -4.191951362215559e-05, -3.603519323813977e-05, -3.091798548144563e-05, -2.647639435921686e-05, -2.262868341731874e-05, -1.930198025541903e-05, -1.643144624604425e-05, -1.395950853531488e-05, -1.183515134512248e-05, -1.001326356237526e-05, -8.454039589003725e-06, -7.122430434999134e-06, -5.987642064113041e-06, -5.022678046193669e-06, -4.203923629679446e-06, -3.510768420718218e-06, -2.925264939962717e-06, -2.431820422577969e-06, -2.016919329708844e-06, -1.668874148988379e-06, -1.377602176093166e-06, -1.134426087434088e-06, -9.318962344708535e-07, -7.636327116232294e-07, -6.241853711959125e-07, -5.089100791242858e-07, -4.138596237843883e-07, -3.356878058037665e-07, -2.715653490822151e-07, -2.191063815114888e-07, -1.763043377024151e-07, -1.41476235015473e-07, -1.132143680674624e-07, -9.034455545788945e-08, -7.189015560752506e-08, -5.704114628210737e-08, -4.512763460307972e-08, -3.559723119659518e-08, -2.799578372152907e-08, -2.195102150705885e-08, -1.715871461392218e-08, -1.337099753071534e-08, -1.038655016768089e-08, -8.042367069662322e-09, -6.206880100510954e-09, -4.774230518701712e-09, -3.659513639590201e-09, -2.794843392885836e-09, -2.126105438372677e-09, -1.610285544099596e-09, -1.213279021940967e-09, -9.080863566659195e-10, -6.733773791829873e-10, -4.921647004054244e-10, -3.51876221811061e-10, -2.375975156699715e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "2S", + "angular_momentum": 0 + }, + { + "index": 2, + "radial_function": [ + 3.5781824562945e-08, 3.668764573427693e-08, 3.761639787619333e-08, 3.856866148889923e-08, 3.954503176802839e-08, 4.054611897665938e-08, 4.157254882674968e-08, 4.262496287022578e-08, 4.370401889997335e-08, 4.481039136097936e-08, 4.594477177188121e-08, 4.710786915718829e-08, 4.830041049044475e-08, 4.95231411486113e-08, 5.077682537794981e-08, 5.206224677170107e-08, 5.338020875985585e-08, 5.473153511132436e-08, 5.611707044881765e-08, 5.753768077676427e-08, 5.899425402259001e-08, 6.048770059170096e-08, 6.201895393651585e-08, 6.358897113990343e-08, 6.519873351338976e-08, 6.684924721050868e-08, 6.854154385567989e-08, 7.027668118900677e-08, 7.205574372739747e-08, 7.387984344242243e-08, 7.575012045533128e-08, 7.766774374966499e-08, 7.96339119019073e-08, 8.164985383063268e-08, 8.371682956461972e-08, 8.583613103040785e-08, 8.800908285979271e-08, 9.023704321776226e-08, 9.252140465139259e-08, 9.486359496023402e-08, 9.726507808872952e-08, 9.972735504122654e-08, 1.022519648201513e-07, 1.048404853879333e-07, 1.074945346532819e-07, 1.102157714824282e-07, 1.130058967359689e-07, 1.158666543319573e-07, 1.187998323359053e-07, 1.21807264078381e-07, 1.248908293008956e-07, 1.280524553308014e-07, 1.312941182859298e-07, 1.346178443097261e-07, 1.380257108376528e-07, 1.415198478956485e-07, 1.451024394314613e-07, 1.487757246796821e-07, 1.525419995613346e-07, 1.564036181188978e-07, 1.603629939876523e-07, 1.644226019042765e-07, 1.685849792536329e-07, 1.728527276547098e-07, 1.772285145867129e-07, 1.817150750563196e-07, 1.86315213307141e-07, 1.910318045724588e-07, 1.958677968723318e-07, 2.008262128561986e-07, 2.059101516921216e-07, 2.111227910038613e-07, 2.164673888569854e-07, 2.219472857952554e-07, 2.275659069285683e-07, 2.333267640737489e-07, 2.392334579495396e-07, 2.452896804271565e-07, 2.514992168378159e-07, 2.578659483386792e-07, 2.643938543386862e-07, 2.710870149858044e-07, 2.779496137172381e-07, 2.849859398741982e-07, 2.922003913828652e-07, 2.995974775032169e-07, 3.071818216474471e-07, 3.149581642697293e-07, 3.229313658291352e-07, 3.311064098275612e-07, 3.394884059245547e-07, 3.480825931309965e-07, 3.568943430836299e-07, 3.65929163402482e-07, 3.751927011332817e-07, 3.846907462770156e-07, 3.944292354088414e-07, 4.044142553886077e-07, 4.146520471653085e-07, 4.251490096778471e-07, 4.359117038545416e-07, 4.469468567138852e-07, 4.582613655691102e-07, 4.698623023391915e-07, 4.817569179689879e-07, 4.939526469612647e-07, 5.064571120234584e-07, 5.192781288320631e-07, 5.324237109176309e-07, 5.459020746734361e-07, 5.597216444909244e-07, 5.738910580251734e-07, 5.884191715936434e-07, 6.033150657115947e-07, 6.185880507676374e-07, 6.342476728429457e-07, 6.503037196777945e-07, 6.667662267891318e-07, 6.836454837430149e-07, 7.009520405858358e-07, 7.186967144383404e-07, 7.368905962565862e-07, 7.555450577640378e-07, 7.746717585591564e-07, 7.942826534029114e-07, 8.143899996907651e-07, 8.350063651138181e-07, 8.561446355138864e-07, 8.778180229374269e-07, 9.000400738933506e-07, 9.22824677819863e-07, 9.46186075765654e-07, 9.701388692908388e-07, 9.946980295932203e-07, 1.01987890686559e-06, 1.045697239889883e-06, 1.072169165874214e-06, 1.09931123053894e-06, 1.127140398458027e-06, 1.155674063662222e-06, 1.184930060510611e-06, 1.21492667483741e-06, 1.24568265538091e-06, 1.277217225501744e-06, 1.309550095197797e-06, 1.34270147342325e-06, 1.376692080719486e-06, 1.411543162165729e-06, 1.447276500657514e-06, 1.483914430521302e-06, 1.521479851473717e-06, 1.559996242934166e-06, 1.59948767869976e-06, 1.639978841991711e-06, 1.681495040882639e-06, 1.724062224114358e-06, 1.767706997316115e-06, 1.812456639633361e-06, 1.858339120777447e-06, 1.905383118506942e-06, 1.95361803655143e-06, 2.003074022989064e-06, 2.053781989089297e-06, 2.105773628632602e-06, 2.159081437719261e-06, 2.213738735079528e-06, 2.269779682898007e-06, 2.327239308165075e-06, 2.386153524568837e-06, 2.44655915494125e-06, 2.508493954272369e-06, 2.571996633307261e-06, 2.637106882740132e-06, 2.703865398020956e-06, 2.772313904790034e-06, 2.842495184956305e-06, 2.914453103435926e-06, 2.988232635567527e-06, 3.063879895221542e-06, 3.141442163621038e-06, 3.220967918892026e-06, 3.302506866361904e-06, 3.386109969624688e-06, 3.47182948239269e-06, 3.559718981154443e-06, 3.649833398659179e-06, 3.742229058249063e-06, 3.836963709060281e-06, 3.934096562115263e-06, 4.033688327328477e-06, 4.135801251448827e-06, 4.240499156962627e-06, 4.347847481981114e-06, 4.457913321137708e-06, 4.570765467520463e-06, 4.686474455665788e-06, 4.805112605640612e-06, 4.926754068240155e-06, 5.051474871329831e-06, 5.179352967360179e-06, 5.310468282084268e-06, 5.444902764508501e-06, 5.582740438107459e-06, 5.724067453335216e-06, 5.868972141465775e-06, 6.017545069796087e-06, 6.169879098246582e-06, 6.326069437394042e-06, 6.486213707973468e-06, 6.650412001885986e-06, 6.818766944750712e-06, 6.991383760040115e-06, 7.168370334838373e-06, 7.349837287264229e-06, 7.535898035600353e-06, 7.72666886917213e-06, 7.922269021020714e-06, 8.122820742415058e-06, 8.32844937925001e-06, 8.539283450377979e-06, 8.755454727922883e-06, 8.977098319627158e-06, 9.204352753282444e-06, 9.437360063297369e-06, 9.676265879456205e-06, 9.921219517923626e-06, 1.017237407455301e-05, 1.042988652055582e-05, 1.069391780059233e-05, 1.096463293334489e-05, 1.124220111463607e-05, 1.152679582315698e-05, 1.181859492887072e-05, 1.211778080415948e-05, 1.242454043778442e-05, 1.273906555172909e-05, 1.306155272100031e-05, 1.339220349646011e-05, 1.373122453076637e-05, 1.407882770750043e-05, 1.4435230273562e-05, 1.480065497491484e-05, 1.517533019576684e-05, 1.555949010127241e-05, 1.59533747838457e-05, 1.635723041317596e-05, 1.67713093900391e-05, 1.719587050400093e-05, 1.763117909511086e-05, 1.807750721968701e-05, 1.85351338202958e-05, 1.900434490003266e-05, 1.948543370121208e-05, 1.997870088857892e-05, 2.048445473715517e-05, 2.100301132483918e-05, 2.153469472987794e-05, 2.207983723333517e-05, 2.263877952668186e-05, 2.321187092463882e-05, 2.379946958340347e-05, 2.440194272439789e-05, 2.501966686367676e-05, 2.565302804713913e-05, 2.63024220916901e-05, 2.696825483250307e-05, 2.765094237653693e-05, 2.835091136246584e-05, 2.906859922718418e-05, 2.980445447905264e-05, 3.055893697805552e-05, 3.133251822304465e-05, 3.212568164624813e-05, 3.293892291522822e-05, 3.377275024247612e-05, 3.462768470283687e-05, 3.550426055896221e-05, 3.640302559499389e-05, 3.732454145868586e-05, 3.826938401217841e-05, 3.923814369164224e-05, 4.023142587601761e-05, 4.124985126507714e-05, 4.229405626704854e-05, 4.336469339603871e-05, 4.446243167950574e-05, 4.558795707603444e-05, 4.674197290367326e-05, 4.792520027910123e-05, 4.913837856789756e-05, 5.038226584619365e-05, 5.165763937399628e-05, 5.296529608047482e-05, 5.430605306151616e-05, 5.568074808985597e-05, 5.709024013810352e-05, 5.853540991498646e-05, 6.001716041514794e-05, 6.153641748283902e-05, 6.309413038985632e-05, 6.469127242808426e-05, 6.632884151701092e-05, 6.800786082659423e-05, 6.972937941586652e-05, 7.149447288767394e-05, 7.330424405995714e-05, 7.515982365399137e-05, 7.706237100001221e-05, 7.901307476066631e-05, 8.101315367273595e-05, 8.306385730759739e-05, 8.516646685088647e-05, 8.732229590185387e-05, 8.953269129290692e-05, 9.179903392984698e-05, 9.412273965332157e-05, 9.650526012202864e-05, 9.894808371821722e-05, 0.0001014527364760488, 0.0001040207830333933, 0.0001066538276076494, 0.0001093535149961948, 0.0001121215316020854, 0.0001149596064856382, 0.0001178695124425504, 0.0001208530671092218, 0.0001239121340959647, 0.0001270486241487835, 0.0001302644963405149, 0.0001335617592919342, 0.000136942472423729, 0.0001404087472399813, 0.0001439627486440888, 0.0001476066962877821, 0.000151342865954231, 0.0001551735909759425, 0.0001591012636884657, 0.0001631283369206601, 0.0001672573255226058, 0.0001714908079319196, 0.0001758314277796207, 0.0001802818955363382, 0.0001848449902001387, 0.0001895235610266896, 0.0001943205293031163, 0.0001992388901664784, 0.000204281714467962, 0.0002094521506841264, 0.0002147534268761546, 0.0002201888526983833, 0.0002257618214574478, 0.0002314758122231912, 0.0002373343919926259, 0.0002433412179083766, 0.0002495000395328271, 0.000255814701179541, 0.0002622891443031348, 0.0002689274099493504, 0.0002757336412665737, 0.000282712086080495, 0.0002898670995334565, 0.0002972031467899982, 0.0003047248058103373, 0.0003124367701935802, 0.0003203438520920363, 0.0003284509851989297, 0.0003367632278108398, 0.0003452857659669751, 0.0003540239166672492, 0.0003629831311708926, 0.0003721689983777333, 0.0003815872482942962, 0.0003912437555865154, 0.0004011445432215905, 0.000411295786200847, 0.0004217038153860645, 0.000432375121421542, 0.000443316358754201, 0.0004545343497541781, 0.0004660360889384606, 0.000477828747299953, 0.0004899196767447487, 0.0005023164146401324, 0.0005150266884760552, 0.0005280584206429977, 0.0005414197333288541, 0.0005551189535380559, 0.0005691646182355684, 0.0005835654796191834, 0.0005983305105230126, 0.0006134689099554059, 0.0006289901087749034, 0.0006449037755065614, 0.0006612198223043201, 0.0006779484110595631, 0.0006950999596629736, 0.0007126851484208387, 0.0007307149266311691, 0.0007492005193219549, 0.0007681534341576293, 0.0007875854685153285, 0.0008075087167370371, 0.0008279355775608593, 0.0008488787617361138, 0.0008703512998264235, 0.0008923665502057322, 0.0009149382072511119, 0.0009380803097376913, 0.0009618072494415614, 0.0009861337799525313, 0.001011075025705154, 0.001036646491231532, 0.001062864070641011, 0.001089744057333161, 0.001117303153948414, 0.001145558482563062, 0.001174527595133571, 0.001204228484196563, 0.001234679593830322, 0.001265899830884311, 0.00129790857648185, 0.001330725697803991, 0.001364371560159599, 0.001398867039350027, 0.001434233534332933, 0.001470492980194919, 0.001507667861437163, 0.001545781225584258, 0.001584856697120688, 0.00162491849176577, 0.001665991431091149, 0.001708100957492703, 0.001751273149519974, 0.001795534737576735, 0.001840913119996969, 0.001887436379505696, 0.001935133300075092, 0.001984033384182201, 0.00203416687047865, 0.002085564751881396, 0.00213825879409352, 0.002192281554564297, 0.002247666401899033, 0.002304447535727798, 0.002362660007042809, 0.002422339739016274, 0.002483523548305754, 0.002546249166861346, 0.002610555264242594, 0.002676481470457378, 0.002744068399332953, 0.002813357672430934, 0.002884391943516965, 0.002957214923597401, 0.003031871406533525, 0.003108407295246219, 0.003186869628522319, 0.003267306608435528, 0.00334976762839281, 0.003434303301820314, 0.003520965491500546, 0.003609807339573268, 0.003700883298213997, 0.003794249161000964, 0.003889962094987393, 0.003988080673487323, 0.004088664909592813, 0.004191776290432788, 0.004297477812189437, 0.004405834015883141, 0.00451691102394191, 0.004630776577567681, 0.004747500074913591, 0.004867152610085812, 0.004989807012983939, 0.005115537889993168, 0.005244421665542614, 0.005376536624543735, 0.005511962955720556, 0.005650782795847537, 0.005793080274908036, 0.005938941562184468, 0.006088454913296849, 0.00624171071819966, 0.006398801550151472, 0.006559822215668988, 0.006724869805478189, 0.006894043746474532, 0.007067445854702948, 0.007245180389369524, 0.007427354107895922, 0.007614076322025645, 0.007805458954992472, 0.008001616599760376, 0.008202666578343423, 0.008408729002212258, 0.008619926833796272, 0.008836385949085757, 0.009058235201340303, 0.009285606485908406, 0.009518634806159661, 0.009757458340534101, 0.01000221851070835, 0.01025306005087825, 0.01051013107815816, 0.01077358316409033, 0.01104357140726451, 0.01132025450703653, 0.01160379483834148, 0.01189435852758988, 0.01219211552963428, 0.01249723970579355, 0.01280990890291743, 0.01313030503347138, 0.01345861415662283, 0.01379502656030116, 0.01413973684420718, 0.01449294400373918, 0.01485485151480352, 0.01522566741947166, 0.01560560441244331, 0.01599487992827037, 0.01639371622929344, 0.01680234049423703, 0.01722098490740554, 0.01764988674841806, 0.01808928848241415, 0.0185394378506557, 0.01900058796144888, 0.01947299738129874, 0.01995693022620726, 0.02045265625301636, 0.02096045095069018, 0.02148059563142786, 0.02201337752148333, 0.02255908985156811, 0.02311803194669849, 0.0236905093153439, 0.02427683373772186, 0.02487732335307525, 0.0254923027457598, 0.02612210302995518, 0.02676706193280501, 0.02742752387578024, 0.02810384005404357, 0.02879636851358618, 0.02950547422588922, 0.03023152915985384, 0.03097491235072377, 0.03173600996571556, 0.03251521536605031, 0.03331292916506999, 0.03412955928209826, 0.03496552099169628, 0.03582123696793835, 0.03669713732331824, 0.03759365964187515, 0.03851124900611061, 0.03945035801724268, 0.04041144680832755, 0.0413949830497511, 0.04240144194657505, 0.04343130622719214, 0.04448506612272917, 0.04556321933660224, 0.04666627100360957, 0.04779473363791765, 0.04894912706926888, 0.05012997836671347, 0.05133782174913685, 0.05257319848182727, 0.05383665675829623, 0.05512875156653643, 0.0564500445388689, 0.05780110378450183, 0.05918250370389089, 0.06059482478395964, 0.06203865337320399, 0.0635145814356777, 0.06502320628281914, 0.06656513028204629, 0.06814096054102226, 0.06975130856645004, 0.07139678989623821, 0.0730780237038346, 0.0747956323735048, 0.07655024104530059, 0.07834247712843187, 0.08017296978173748, 0.08204234935991715, 0.08395124682416995, 0.08590029311586139, 0.08789011849182427, 0.08992135181988348, 0.09199461983318337, 0.09411054634188641, 0.0962697514008112, 0.09847285043157333, 0.1007204532978041, 0.1030131633320276, 0.105351576312796, 0.1077362793907079, 0.1101678499619598, 0.1126468544881276, 0.1151738472609141, 0.1177493691106609, 0.120373946057486, 0.1230480879039844, 0.1257722867685251, 0.1285470155582693, 0.1313727263811565, 0.1342498488962311, 0.1371787886018261, 0.1401599250612831, 0.1431936100660629, 0.1462801657362988, 0.1494198825590591, 0.1526130173648193, 0.155859791242907, 0.159160387396954, 0.1625149489417012, 0.1659235766428226, 0.1693863266017926, 0.1729032078881989, 0.1764741801223084, 0.1800991510111327, 0.1837779738416993, 0.1875104449357291, 0.1912963010704485, 0.1951352168708101, 0.1990268021789865, 0.2029705994076112, 0.2069660808838846, 0.211012646192339, 0.2151096195247509, 0.2192562470464209, 0.2234516942887911, 0.2276950435791409, 0.2319852915189021, 0.2363213465229366, 0.2407020264329454, 0.2451260562190057, 0.2495920657840556, 0.2540985878869874, 0.2586440562007998, 0.2632268035230656, 0.2678450601567351, 0.2724969524799911, 0.2771805017245711, 0.281893622982535, 0.2866341244619974, 0.2913997070127592, 0.2961879639430525, 0.3009963811488134, 0.3058223375768714, 0.3106631060432865, 0.3155158544276928, 0.32037764726386, 0.3252454477458538, 0.3301161201679556, 0.3349864328150375, 0.3398530613182182, 0.3447125924883461, 0.3495615286371956, 0.3543962923930476, 0.3592132320136608, 0.3640086271953871, 0.3687786953723058, 0.373519598493795, 0.3782274502627366, 0.3828983238096649, 0.3875282597704968, 0.3921132747269858, 0.3966493699597682, 0.4011325404536747, 0.4055587840839535, 0.4099241109001222, 0.4142245524113102, 0.4184561707632746, 0.4226150676826604, 0.4266973930486954, 0.4306993529363403, 0.4346172169580265, 0.4384473247136998, 0.4421860911409406, 0.4458300105387346, 0.4493756590201439, 0.4528196951309011, 0.456158858353163, 0.4593899651965248, 0.462509902562371, 0.4655156180530899, 0.4684041068850712, 0.4711723950543243, 0.4738175183965128, 0.4763364971799391, 0.4787263058711839, 0.4809838377195266, 0.4831058638188163, 0.4850889863249938, 0.4869295855350059, 0.4886237605694157, 0.4901672634476491, 0.4915554264026942, 0.4927830823522361, 0.4938444785268851, 0.4947331833543885, 0.4954419868126093, 0.4959627945946122, 0.4962865165772846, 0.4964029502512967, 0.4963014225819783, 0.4959733990313997, 0.4954109094180317, 0.4946058996042733, 0.4935502420466203, 0.4922357470863433, 0.4906541750029206, 0.4887972488536257, 0.4866566681236725, 0.4842241232121725, 0.4814913107798351, 0.4784499499848498, 0.475091799633713, 0.4714086762739028, 0.4673924732552476, 0.4630351807865717, 0.458328907013735, 0.4532659001444829, 0.4478385716446008, 0.4420395205286827, 0.4358615587673947, 0.4292977378313919, 0.4223413763900427, 0.4149860891807867, 0.4072258170623047, 0.3990548582616663, 0.3904679008222415, 0.3814600562553869, 0.3720268943947148, 0.3621644794471164, 0.3518694072295996, 0.3411388435754006, 0.3299705638867124, 0.3183629938047132, 0.3063152509603565, 0.2938271877615727, 0.2808994351641104, 0.2675334473641951, 0.2537315473414775, 0.2394969731703745, 0.2248339250068506, 0.2097476126459344, 0.194244303532813, 0.1783313710971733, 0.162017343266583, 0.1453119510001048, 0.1282261766680466, 0.1107723020877564, 0.09296395600871163, 0.07481616082284612, 0.05634537825812863, 0.03756955379491193, 0.01850815952554044, -0.000817764841794279, -0.02038557315386689, -0.04017097649705425, -0.06014800975543047, -0.0802890014225896, -0.1005645470681388, -0.1209434868804022, -0.1413928877272656, -0.1618780301971282, -0.1823624011014565, -0.2028076919392859, -0.2231738038419974, -0.2434188595336235, -0.2634992228575814, -0.2833695264348797, -0.3029827080312371, -0.3222900562209464, -0.3412412659434242, -0.3597845045539265, -0.3778664889725563, -0.3954325745351476, -0.4124268561455213, -0.4287922823205815, -0.4444707827074426, -0.4594034096347959, -0.4735304942386555, -0.4867918176750149, -0.4991267978983611, -0.5104746924449464, -0.5207748176127277, -0.529966784375429, -0.5379907513057498, -0.5447876947117647, -0.5502996961104939, -0.5544702470728674, -0.5572445713742876, -0.5585699642740934, -0.5583961486248352, -0.5566756473777796, -0.5533641719038438, -0.5484210253886245, -0.5418095203857399, -0.5334974094237869, -0.5234573273583131, -0.5116672439408488, -0.4981109248418368, -0.4827783991129472, -0.4656664308065557, -0.4467789921859787, -0.4261277356598452, -0.4037324612576532, -0.379621576131388, -0.3538325422222548, -0.3264123078715226, -0.2974177187829641, -0.2669159033628413, -0.2349846270744705, -0.2017126100510691, -0.1671998018165434, -0.1315576065734313, -0.09490905213552743, -0.05738889521574608, -0.01914365543443479, 0.01966842990246807, 0.05887753848334679, 0.09830249158549369, 0.1377510872097041, 0.1770205464764418, 0.2158980820149944, 0.2541615969853891, 0.2915805231632611, 0.3279168061726883, 0.362926045452416, 0.3963587958670077, 0.4279620370055654, 0.4574808151253759, 0.484660061374472, 0.5092465883439636, 0.5309912651369452, 0.5496513689756795, 0.5649931088842038, 0.5767943141635473, 0.5848472772088208, 0.5889617366933213, 0.5889679832617741, 0.5847200656369522, 0.5760990704633563, 0.5630164443102194, 0.5454173210668603, 0.523283812532313, 0.4966382143887762, 0.4655460740317063, 0.4301190610033632, 0.3905175751559944, 0.3469530222914716, 0.2996896820442051, 0.2490460883748626, 0.1953958394285999, 0.1391677509112344, 0.08084526580036537, 0.02096503340654016, -0.03988542717981532, -0.1010710600757215, -0.1619136762708536, -0.2216974928971921, -0.279675860672447, -0.3350792164098786, -0.3871242782448621, -0.4350244782231377, -0.4780015999954254, -0.5152985585214758, -0.5461932239636458, -0.570013153534863, -0.5861510533004717, -0.5940807473370895, -0.5933733849441764, -0.5837135687282292, -0.5649150385201859, -0.5369354996852255, -0.4998901411474397, -0.4540633503510662, -0.3999181015531976, -0.338102472974718, -0.2694527407806831, -0.1949953280006498, -0.1159464641614813, -0.0336862701113246, 0.05026867491232625, 0.1342558549216186, 0.2165080750847486, 0.2951824435103374, 0.3683996198893282, 0.4342879524649733, 0.4910325175575206, 0.5369282010265247, 0.570435708062944, 0.590239111739476, 0.595303269770399, 0.5849291721733889, 0.558805043260823, 0.517050825822253, 0.4602535412243284, 0.389490965270454, 0.3063411052197794, 0.2128751269861044, 0.1116316801193784, 0.005571015517078206, -0.1019921039352164, -0.2074769336506771, -0.3071460311296255, -0.397233544448488, -0.4740895819232734, -0.5343364908848872, -0.5750316043003294, -0.5938297909744389, -0.5891380480778737, -0.5602534945979631, -0.5074755596208619, -0.432183011835956, -0.336866844851766, -0.2251110044237336, -0.1015145860142113, 0.02844851856564914, 0.1586334921451396, 0.2824892440579861, 0.3933768697377982, 0.4849303592250651, 0.5514433151735487, 0.5882609885052402, 0.5921530986851359, 0.5616401023072252, 0.4972442532349746, 0.4016373889154147, 0.2796602510561674, 0.138193566887528, -0.01413081946081626, -0.1673798808863442, -0.3109187696907185, -0.4341337309976627, -0.5272379365745713, -0.5821056078697898, -0.5930680507189324, -0.5575973740471305, -0.4768013536933642, -0.3556575921462567, -0.2029278826818338, -0.03071495422422015, 0.1463468543008356, 0.3122394006881366, 0.4509793033494314, 0.5481990143661589, 0.5927459139157175, 0.5781282881488422, 0.5036254015465607, 0.3748870360286959, 0.2038792329569057, 0.008088639080731123, -0.1910241615490186, -0.3702372828550798, -0.5072591942205531, -0.5836162133342584, -0.5873773424428963, -0.5153194993544745, -0.3741487266687449, -0.180466020029507, 0.04069796464873946, 0.2587647934935294, 0.4416665151590923, 0.5606092903488885, 0.5948743342973646, 0.5358673819410635, 0.3896226020528837, 0.1771041478105, -0.06806541442979486, -0.3044074469173482, -0.4893909077190348, -0.5872287441150021, -0.5762882774793028, -0.4545788832158946, -0.2419168528586162, 0.02216496280946916, 0.2849271293354954, 0.4905591941123072, 0.5921819940938632, 0.5632192595465502, 0.4053634247286771, 0.1507192113885644, -0.1432448397346297, -0.4056936046613245, -0.5692579334594415, -0.5879233773114214, -0.4512551986309475, -0.1904631475990176, 0.1266040678376505, 0.4116676674413486, 0.5801907427125577, 0.5772435826088568, 0.3971750950896978, 0.0898604291536929, -0.2504772507554465, -0.51261605023188, -0.604733292937672, -0.488262154920263, -0.1965463520604796, 0.1708511943132507, 0.4795548926172398, 0.6089830855576926, 0.5012302272245173, 0.1902490664759976, -0.2037847472962479, -0.5175969457022102, -0.6119380490294066, -0.4359595667025157 + ], + "label": "2P", + "angular_momentum": 1 + }, + { + "index": 3, + "radial_function": [ + 6.07196669200998e-08, 6.225679253278885e-08, 6.383283066185093e-08, 6.544876638213438e-08, 6.710560970576642e-08, 6.880439621344232e-08, 7.054618770169642e-08, 7.233207284655907e-08, 7.4163167884013e-08, 7.604061730767721e-08, 7.796559458415083e-08, 7.993930288646736e-08, 8.196297584611582e-08, 8.403787832409944e-08, 8.616530720151433e-08, 8.834659219014004e-08, 9.058309666355231e-08, 9.287621850927422e-08, 9.522739100249983e-08, 9.763808370193695e-08, 1.001098033683267e-07, 1.026440949062169e-07, 1.052425423295756e-07, 1.0790676975185e-07, 1.106384424010881e-07, 1.134392676607579e-07, 1.163109961369161e-07, 1.192554227523912e-07, 1.222743878686667e-07, 1.253697784361645e-07, 1.285435291736468e-07, 1.317976237774766e-07, 1.351340961614893e-07, 1.385550317282513e-07, 1.420625686725027e-07, 1.45658899317594e-07, 1.493462714857562e-07, 1.531269899030596e-07, 1.570034176399381e-07, 1.609779775881822e-07, 1.650531539753194e-07, 1.692314939173334e-07, 1.735156090106899e-07, 1.779081769646637e-07, 1.824119432749904e-07, 1.870297229398831e-07, 1.917644022194931e-07, 1.966189404399101e-07, 2.015963718428309e-07, 2.066998074820531e-07, 2.119324371679762e-07, 2.172975314613312e-07, 2.227984437173782e-07, 2.28438612181855e-07, 2.342215621399849e-07, 2.401509081198832e-07, 2.462303561517468e-07, 2.524637060842337e-07, 2.588548539594803e-07, 2.654077944482453e-07, 2.721266233466949e-07, 2.790155401363982e-07, 2.860788506091254e-07, 2.933209695580946e-07, 3.007464235373485e-07, 3.083598536909802e-07, 3.161660186539851e-07, 3.241697975265457e-07, 3.323761929236094e-07, 3.4079033410167e-07, 3.494174801646971e-07, 3.58263023351229e-07, 3.673324924046752e-07, 3.766315560289377e-07, 3.861660264315147e-07, 3.959418629562905e-07, 4.059651758082954e-07, 4.162422298727549e-07, 4.267794486308176e-07, 4.375834181744116e-07, 4.486608913227298e-07, 4.600187918429329e-07, 4.716642187776901e-07, 4.836044508822768e-07, 4.958469511739951e-07, 5.083993715967587e-07, 5.212695578037658e-07, 5.344655540612413e-07, 5.479956082763172e-07, 5.618681771521962e-07, 5.760919314738071e-07, 5.906757615272773e-07, 6.056287826565911e-07, 6.209603409609138e-07, 6.366800191361477e-07, 6.527976424643549e-07, 6.693232849548121e-07, 6.862672756405164e-07, 7.036402050340894e-07, 7.21452931747113e-07, 7.397165892770202e-07, 7.584425929658091e-07, 7.776426471349029e-07, 7.973287524006311e-07, 8.175132131749044e-07, 8.382086453557511e-07, 8.594279842125548e-07, 8.811844924708898e-07, 9.034917686020293e-07, 9.263637553222995e-07, 9.498147483075788e-07, 9.738594051284154e-07, 9.985127544113274e-07, 1.023790205232014e-06, 1.049707556746359e-06, 1.076281008065229e-06, 1.103527168379249e-06, 1.13146306733989e-06, 1.160106165703334e-06, 1.189474366243801e-06, 1.219586024943115e-06, 1.250459962463551e-06, 1.282115475911095e-06, 1.314572350896492e-06, 1.347850873901607e-06, 1.381971844958829e-06, 1.416956590651453e-06, 1.45282697744315e-06, 1.489605425344865e-06, 1.527314921927692e-06, 1.565979036690447e-06, 1.605621935790976e-06, 1.64626839715035e-06, 1.687943825939421e-06, 1.730674270457415e-06, 1.774486438412449e-06, 1.819407713614196e-06, 1.865466173089114e-06, 1.912690604628899e-06, 1.961110524783209e-06, 2.010756197307796e-06, 2.061658652079675e-06, 2.113849704491099e-06, 2.167361975334457e-06, 2.222228911190562e-06, 2.278484805333007e-06, 2.33616481916172e-06, 2.395305004179069e-06, 2.455942324522257e-06, 2.518114680066124e-06, 2.581860930110709e-06, 2.64722091766849e-06, 2.714235494366376e-06, 2.782946545978068e-06, 2.853397018602764e-06, 2.925630945506466e-06, 2.999693474642794e-06, 3.075630896870426e-06, 3.153490674884793e-06, 3.23332147288217e-06, 3.315173186974596e-06, 3.399096976374765e-06, 3.485145295370246e-06, 3.573371926107086e-06, 3.663832012203302e-06, 3.756582093213118e-06, 3.851680139963752e-06, 3.949185590786522e-06, 4.04915938866515e-06, 4.151664019324402e-06, 4.256763550282731e-06, 4.364523670893633e-06, 4.475011733400366e-06, 4.58829679502997e-06, 4.704449661152796e-06, 4.823542929534362e-06, 4.945651035707569e-06, 5.070850299493116e-06, 5.199218972697623e-06, 5.330837288019073e-06, 5.465787509189984e-06, 5.604153982390082e-06, 5.746023188960017e-06, 5.891483799449558e-06, 6.040626729033792e-06, 6.193545194331846e-06, 6.350334771664048e-06, 6.511093456783367e-06, 6.675921726118902e-06, 6.844922599569503e-06, 7.018201704886604e-06, 7.195867343686923e-06, 7.378030559135704e-06, 7.564805205343246e-06, 7.756308018517922e-06, 7.952658689919875e-06, 8.153979940661575e-06, 8.360397598401226e-06, 8.572040675977506e-06, 8.789041452034581e-06, 9.011535553687425e-06, 9.239662041279864e-06, 9.473563495287316e-06, 9.713386105419302e-06, 9.95927976197705e-06, 1.021139814952303e-05, 1.046989884292159e-05, 1.073494340580987e-05, 1.100669749156108e-05, 1.128533094680311e-05, 1.157101791755664e-05, 1.186393695805996e-05, 1.216427114234741e-05, 1.247220817865194e-05, 1.278794052670291e-05, 1.311166551799209e-05, 1.344358547908381e-05, 1.37839078580452e-05, 1.413284535407623e-05, 1.449061605042032e-05, 1.48574435506381e-05, 1.523355711833032e-05, 1.561919182039591e-05, 1.60145886739158e-05, 1.641999479675357e-05, 1.683566356196679e-05, 1.726185475612642e-05, 1.769883474164162e-05, 1.814687662319269e-05, 1.860626041837543e-05, 1.907727323266292e-05, 1.956020943879541e-05, 2.005537086070845e-05, 2.056306696211548e-05, 2.108361503986221e-05, 2.161734042217265e-05, 2.216457667191212e-05, 2.27256657949924e-05, 2.330095845405023e-05, 2.389081418753219e-05, 2.449560163432201e-05, 2.511569876405207e-05, 2.575149311324065e-05, 2.640338202740422e-05, 2.707177290929464e-05, 2.775708347341639e-05, 2.845974200698301e-05, 2.918018763747491e-05, 2.991887060696585e-05, 3.06762525533893e-05, 3.145280679891948e-05, 3.224901864564788e-05, 3.306538567873865e-05, 3.390241807725264e-05, 3.476063893283374e-05, 3.564058457645586e-05, 3.654280491343489e-05, 3.746786376691379e-05, 3.841633923003569e-05, 3.938882402702406e-05, 4.038592588339482e-05, 4.140826790553198e-05, 4.245648896986229e-05, 4.353124412187231e-05, 4.463320498521627e-05, 4.576306018116913e-05, 4.692151575868719e-05, 4.81092956353431e-05, 4.932714204941074e-05, 5.05758160233814e-05, 5.185609783919929e-05, 5.31687875255137e-05, 5.451470535724956e-05, 5.589469236780904e-05, 5.730961087422205e-05, 5.876034501557286e-05, 6.024780130503835e-05, 6.177290919588078e-05, 6.333662166174787e-05, 6.493991579164155e-05, 6.658379339992442e-05, 6.826928165174505e-05, 6.999743370426949e-05, 7.176932936411925e-05, 7.358607576142403e-05, 7.544880804090764e-05, 7.735869007043886e-05, 7.931691516748475e-05, 8.132470684392073e-05, 8.33833195696593e-05, 8.549403955557044e-05, 8.765818555618357e-05, 8.987710969266558e-05, 9.215219829658965e-05, 9.448487277501705e-05, 9.687659049742903e-05, 9.932884570506039e-05, 0.0001018431704431979, 0.0001044211355170239, 0.0001070643514715961, 0.0001097744695965733, 0.0001125531829563091, 0.0001154022274459532, 0.0001183233828742139, 0.0001213184740734554, 0.0001243893720378142, 0.0001275379950900437, 0.0001307663100778068, 0.0001340763336001596, 0.0001374701332649841, 0.000140949828978149, 0.0001445175942651967, 0.0001481756576263738, 0.0001519263039258432, 0.0001557718758159323, 0.000159714775197325, 0.0001637574647160561, 0.0001679024692982149, 0.0001721523777234753, 0.0001765098442381168, 0.0001809775902088945, 0.0001855584058184258, 0.0001902551518034407, 0.0001950707612367253, 0.0002000082413540987, 0.000205070675427273, 0.0002102612246839896, 0.000215583130276514, 0.0002210397152996472, 0.0002266343868596408, 0.0002323706381951147, 0.0002382520508514444, 0.0002442822969098561, 0.0002504651412725724, 0.0002568044440055558, 0.0002633041627400942, 0.0002699683551349356, 0.0002768011814001823, 0.0002838069068847562, 0.0002909899047289772, 0.000298354658583693, 0.0003059057653978272, 0.0003136479382759928, 0.0003215860094078797, 0.0003297249330712045, 0.0003380697887101836, 0.0003466257840911575, 0.0003553982585375784, 0.0003643926862461147, 0.0003736146796859936, 0.000383069993083684, 0.0003927645259948656, 0.0004027043269660867, 0.0004128955972881834, 0.0004233446948437903, 0.0004340581380512485, 0.000445042609907315, 0.0004563049621311555, 0.000467852219412031, 0.000479691583763538, 0.0004918304389862987, 0.0005042763552430427, 0.0005170370937472855, 0.0005301206115700099, 0.0005435350665658361, 0.0005572888224226184, 0.0005713904538373246, 0.0005858487518206946, 0.0006006727291344077, 0.0006158716258646225, 0.0006314549151331181, 0.0006474323089523007, 0.0006638137642253193, 0.0006806094888955237, 0.0006978299482494824, 0.0007154858713771027, 0.0007335882577917394, 0.0007521483842151034, 0.0007711778115314419, 0.0007906883919141192, 0.0008106922761288689, 0.0008312019210186977, 0.0008522300971742197, 0.0008737898967948489, 0.0008958947417435339, 0.0009185583918025293, 0.0009417949531319927, 0.0009656188869391919, 0.000990045018361398, 0.001015088545567753, 0.001040765049087559, 0.001067090501366537, 0.001094081276560599, 0.001121754160570298, 0.001150126361322267, 0.001179215519303665, 0.00120903971835595, 0.001239617496734227, 0.001270967858436623, 0.00130311028481384, 0.001336064746461277, 0.001369851715403768, 0.001404492177577595, 0.001440007645618509, 0.001476420171961311, 0.00151375236225972, 0.001552027389132949, 0.001591269006247733, 0.001631501562742196, 0.001672750018000805, 0.001715039956787756, 0.001758397604747809, 0.001802849844282223, 0.001848424230808482, 0.001895149009414035, 0.001943053131910809, 0.001992166274301563, 0.002042518854666302, 0.002094142051479009, 0.002147067822363944, 0.002201328923301687, 0.002256958928294004, 0.002313992249500962, 0.002372464157855669, 0.002432410804172502, 0.002493869240756369, 0.002556877443524958, 0.002621474334655706, 0.002687699805768677, 0.002755594741656685, 0.002825201044574677, 0.002896561659100818, 0.002969720597580887, 0.003044722966169306, 0.003121614991478093, 0.003200444047848493, 0.003281258685255616, 0.003364108657863128, 0.003449044953237156, 0.003536119822236374, 0.003625386809590236, 0.003716900785180116, 0.003810717976037234, 0.003906895999071986, 0.00400549389454816, 0.004106572160318699, 0.004210192786835912, 0.004316419292952279, 0.004425316762525906, 0.004536951881847682, 0.004651392977903358, 0.004768710057487979, 0.004888974847187002, 0.005012260834240572, 0.005138643308306343, 0.005268199404136626, 0.005401008145187241, 0.005537150488171198, 0.0056767093685761, 0.005819769747159999, 0.00596641865744123, 0.006116745254199498, 0.006270840863002724, 0.006428799030777055, 0.006590715577433747, 0.006756688648570934, 0.006926818769263489, 0.007101208898957659, 0.007279964487484932, 0.007463193532210022, 0.007651006636327142, 0.00784351706831939, 0.008040840822593747, 0.008243096681306716, 0.00845040627739116, 0.008662894158799173, 0.00888068785397055, 0.00910391793853862, 0.009332718103284465, 0.009567225223348446, 0.009807579428706817, 0.01005392417592387, 0.01030640632118341, 0.01056517619460736, 0.01083038767586495, 0.0111021982710768, 0.01138076919101493, 0.01166626543059961, 0.01195885584969202, 0.01225871325518001, 0.01256601448435238, 0.0128809404895553, 0.01320367642412201, 0.01353441172956584, 0.01387334022402275, 0.01422066019192865, 0.01457657447491151, 0.01494129056388039, 0.0153150206922839, 0.01569798193051274, 0.01609039628141555, 0.01649249077689311, 0.01690449757553251, 0.01732665406124072, 0.01775920294282812, 0.01820239235449393, 0.01865647595715554, 0.01912171304056069, 0.01959836862611696, 0.02008671357036573, 0.02058702466902149, 0.02109958476149335, 0.02162468283579591, 0.02216261413375082, 0.02271368025637423, 0.0232781892693345, 0.0238564558083584, 0.02444880118445525, 0.02505555348881644, 0.02567704769724336, 0.02631362577393984, 0.02696563677450055, 0.02763343694791108, 0.0283173898373668, 0.02901786637970193, 0.02973524500321021, 0.03046991172362329, 0.03122226023799688, 0.0319926920162446, 0.03278161639003745, 0.03358945063877466, 0.03441662007231303, 0.03526355811012214, 0.0361307063565174, 0.03701851467159845, 0.03792744123750379, 0.03885795261956819, 0.03981052382194739, 0.04078563833725275, 0.04178378818970918, 0.04280547397133097, 0.04385120487057592, 0.04492149869291587, 0.04601688187272934, 0.04713788947589343, 0.04828506519242013, 0.04945896131844965, 0.05066013872687794, 0.05188916682586385, 0.05314662350442133, 0.05443309506426425, 0.055749176137037, 0.05709546958601452, 0.05847258639132406, 0.05988114551768726, 0.06132177376364384, 0.06279510559116844, 0.06430178293454303, 0.0658424549873033, 0.06741777796601962, 0.06902841484962895, 0.07067503509297277, 0.07235831431314753, 0.07407893394721472, 0.0758375808797612, 0.07763494703874077, 0.07947172895796946, 0.08134862730458499, 0.08326634636971837, 0.0852255935205649, 0.08722707861197379, 0.08927151335561699, 0.09135961064472531, 0.09349208383232244, 0.0956696459608144, 0.09789300894073268, 0.100162882676359, 0.1024799741358954, 0.1048449863637825, 0.1072586174327014, 0.1097215593327341, 0.1122344967950985, 0.1147981060478126, 0.1174130535005911, 0.1200799943562218, 0.1227995711456209, 0.1255724121837237, 0.1283991299433208, 0.1312803193439223, 0.1342165559526983, 0.1372083940945232, 0.1402563648681363, 0.1433609740654227, 0.1465226999908271, 0.1497419911779173, 0.1530192640001459, 0.156354900172891, 0.159749244143907, 0.1632026003693838, 0.1667152304728906, 0.1702873502845799, 0.1739191267581446, 0.1776106747631561, 0.1813620537505761, 0.1851732642894092, 0.1890442444726822, 0.1929748661911614, 0.1969649312734887, 0.2010141674917159, 0.2051222244315364, 0.2092886692268866, 0.2135129821589825, 0.2177945521203005, 0.2221326719444952, 0.2265265336037645, 0.230975223275754, 0.2354777162827029, 0.2400328719062051, 0.244639428081689, 0.2492959959774825, 0.2540010544641768, 0.2587529444808832, 0.2635498633059344, 0.2683898587405972, 0.273270823215433, 0.2781904878300925, 0.2831464163385307, 0.2881359990928996, 0.2931564469607209, 0.2982047852313345, 0.3032778475290901, 0.3083722697522823, 0.3134844840584088, 0.3186107129179986, 0.323746963260943, 0.3288890207410226, 0.3340324441461286, 0.3391725599834958, 0.3443044572711477, 0.3494229825686331, 0.354522735282032, 0.3595980632801167, 0.3646430588604196, 0.3696515551058502, 0.3746171226742748, 0.3795330670652534, 0.384392426409792, 0.3891879698305064, 0.3939121964210643, 0.3985573348950179, 0.4031153439552516, 0.4075779134361639, 0.4119364662713099, 0.4161821613396339, 0.4203058972434378, 0.4242983170709466, 0.428149814195652, 0.4318505391634728, 0.4353904077172265, 0.4387591100057827, 0.4419461210226598, 0.4449407123156026, 0.4477319650048198, 0.4503087841430963, 0.4526599144457765, 0.4547739574127402, 0.4566393898578547, 0.458244583853995, 0.4595778280936128, 0.4606273506559331, 0.4613813431622625, 0.4618279862905827, 0.461955476609645, 0.4617520546812481, 0.4612060343673452, 0.4603058332662165, 0.4590400041892778, 0.4573972675773549, 0.4553665447426352, 0.4529369918102254, 0.4500980342215873, 0.4468394016513746, 0.4431511631797353, 0.4390237625542865, 0.4344480533702607, 0.429415333994096, 0.4239173820556222, 0.4179464883374903, 0.4114954898981456, 0.4045578022771926, 0.397127450649946, 0.3891990998220623, 0.380768082986073, 0.3718304291999313, 0.362382889594158, 0.3524229623692272, 0.3419489167090911, 0.3309598158106614, 0.3194555393127395, 0.3074368055017335, 0.2949051937751569, 0.281863167957188, 0.2683141011829097, 0.2542623031979944, 0.2397130510575375, 0.2246726243492202, 0.2091483462098397, 0.1931486315474763, 0.176683044020248, 0.1597623634527754, 0.1423986654875371, 0.1246056385229154, 0.1063995153040935, 0.08779877635425645, 0.06882405572798758, 0.04949822829587915, 0.02984649449951327, 0.009896461871685618, -0.0103217774423702, -0.03077557398840189, -0.05142965402029309, -0.07224605918523722, -0.09318409389778846, -0.1142002814911737, -0.1352483303022205, -0.1562791109140894, -0.1772406458480169, -0.1980781130607093, -0.2187338646670081, -0.2391474623669823, -0.2592557311116054, -0.2789928325903995, -0.2982903601665371, -0.3170774569183736, -0.3352809584696013, -0.3528255623013902, -0.3696340252370661, -0.3856273907709799, -0.400725247876021, -0.4148460228663026, -0.4279073058104098, -0.4398262128835481, -0.4505197859112192, -0.4599054301897767, -0.4679013914674403, -0.4744272727300562, -0.4794045911560771, -0.4827573752819081, -0.4844128020490203, -0.4843018729853075, -0.482360128302494, -0.4785283971667213, -0.4727535818188531, -0.4649894725831177, -0.4551975901066149, -0.4433480504178334, -0.4294204475803621, -0.4134047478501587, -0.3953021883239038, -0.3751261720963316, -0.3529031509316387, -0.3286734854055278, -0.3024922713993733, -0.2744301207377338, -0.2445738826685766, -0.2130272918081628, -0.1799115271282742, -0.14536566557382, -0.1095470129882907, -0.07263129422048935, -0.03481268361906553, 0.003696343374692608, 0.04266535711380943, 0.0818465387314527, 0.120975482596174, 0.1597722550239485, 0.197942727070024, 0.2351801978299515, 0.2711673228077153, 0.3055783595114282, 0.3380817394669869, 0.3683429722536052, 0.3960278829229651, 0.4208061792327512, 0.4423553394808251, 0.4603648053540388, 0.4745404571045492, 0.484609340551286, 0.4903246069083814, 0.4914706173212888, 0.4878681543253706, 0.4793796723395892, 0.4659145089096662, 0.4474339678951714, 0.4239561753650959, 0.3955605988774914, 0.3623921113626765, 0.3246644723399092, 0.2826630920493156, 0.2367469386889092, 0.1873494457617768, 0.134978276043165, 0.08021380137716269, 0.02370616392758294, -0.03382920484192568, -0.0916177157919324, -0.1488334920645406, -0.2046090388711463, -0.2580467424160458, -0.3082321888895327, -0.3542492498368048, -0.3951968294302978, -0.4302071125639673, -0.4584650908247196, -0.4792290771469557, -0.4918518505178026, -0.4958020010310605, -0.4906849748284285, -0.4762632503616966, -0.4524750147074661, -0.4194506545073621, -0.3775263339921943, -0.3272539062824321, -0.2694063977642589, -0.2049783229503163, -0.1351801329336137, -0.06142617818990675, 0.01468432051477682, 0.0913936483010772, 0.1668194838242039, 0.2389962318487994, 0.3059234644260829, 0.3656208289906668, 0.4161884463556769, 0.4558714644780451, 0.4831270660520506, 0.496691863517054, 0.4956472709842796, 0.4794801387965256, 0.4481356953295227, 0.4020596865245475, 0.3422265618499569, 0.2701506511470656, 0.1878775337598364, 0.09795323975981225, 0.003369557842383798, -0.09251543718097334, -0.1860814852396603, -0.2735734934517818, -0.35124938293654, -0.4155441827456777, -0.4632445878789431, -0.4916666983808717, -0.4988282908013138, -0.4836058546301746, -0.4458658775888225, -0.3865596116760081, -0.3077709190548494, -0.2127078910262316, -0.1056308356237104, 0.00828801893203305, 0.1231741688661095, 0.2327265445624194, 0.3305510539946817, 0.4105384207200283, 0.4672674392226495, 0.4964098130911053, 0.4951086433123969, 0.4622999154849149, 0.3989455224640179, 0.308147937612426, 0.1951210091844806, 0.06699870122404784, -0.06752604525555932, -0.1987271107714711, -0.3165153244206929, -0.4112164076594336, -0.4744043217076449, -0.499723635220171, -0.4836243424736711, -0.4259280585083366, -0.3301474340369417, -0.2034922971409078, -0.05651708685265205, 0.09760570560334157, 0.2441635991839944, 0.3682685775297774, 0.4563994224300647, 0.4979815404565464, 0.4868285300764764, 0.4222002309605242, 0.3093437913750844, 0.1594775626829866, -0.01105410607993743, -0.1823367010718788, -0.3330923483947517, -0.4433242736834649, -0.4970971296453459, -0.4850789298070443, -0.4064418318290014, -0.2697473572146973, -0.09253531780611382, 0.1005062912925828, 0.2807033631162947, 0.4195660083192265, 0.4934162208756996, 0.4877478330779747, 0.4005172368834696, 0.2436212404727749, 0.04202980209657466, -0.1696012323159912, -0.3525488084170822, -0.4710874661797677, -0.4997836173363122, -0.4295457742532802, -0.2709718518828135, -0.05387914153773801, 0.1774287731107132, 0.3726973166279175, 0.4866209005416548, 0.4898517741811003, 0.3774477001661045, 0.1722786714997939, -0.07820570454163168, -0.3119582505118796, -0.4673598866450677, -0.4998861488348805, -0.3959592316309146, -0.1795853799360581, 0.09122387744018579, 0.3386393017631028, 0.486947442034525, 0.4863897373799671, 0.3314349570838935, 0.06659705467084398, -0.2239879611183269, -0.4419381619344689, -0.5080442272222534, -0.3926513279561344, -0.131159638842465, 0.183479232716879, 0.4316620671127718, 0.5123724753884775, 0.3861536541988457, 0.09729107547444414, -0.237458709578817, -0.4734910295310395, -0.5009173966774688, -0.2985386345605389, 0.04782690262200534, 0.3769078745068928, 0.525016329791719, 0.4092067917408874, 0.07829114940804717, -0.3006350070741661, -0.5224280926617738, -0.4555575499153309, -0.1251606224129766, 0.2862965701540851, 0.5338482763797501, 0.455903304310532, 0.08570901789721774, -0.3501961990449067, -0.5634611836205874, -0.3954611181937175, 0.0568650962532125, 0.4829370186014508, 0.5648944257950091, 0.2186592188799192, -0.3129889116244797, -0.6157204091085092, -0.4233531654364162, 0.1385439196062137, 0.6102956683860189, 0.5623077018284641, -0.007808711727774371, -0.6109355724930764, -0.6643219504103295, -0.05490436516566257, 0.668007969629802, 0.7540209370107244, 0.02371220445150303, -0.8235268321362492, -0.8292515036235435, 0.1561617381586917, 1.110833706037405, 0.8279526897873001, -0.5926575264851783, -1.523063848747459, -0.5639088374846358, 1.44610261788616, 1.89916964697453, -0.3734665812350208, -2.815488569428936, -1.642767538027539, 2.663876010664339, 4.270177181722614, -0.7636185842576751, -6.813431783988215, -3.581181098415261, 7.98922117585164, 10.96064691367101, -5.498417048526511, -21.43579284760681, -4.658421697895847, 33.40026659724299, 29.08607194153782, -40.85667118415634, -77.68440336538364, 27.35150663714605, 162.3555910904419, 46.67334239222285, -288.0539951206264, -268.0786250286146, 419.9381067501886, 810.6015043438076, -383.916378006141 + ], + "label": "2P", + "angular_momentum": 1 + } + ], + "ae_local_potential": [ + -9869.698425778925, -9747.09506286803, -9626.014703391345, -9506.43842829636, -9388.347553546804, -9271.723627203235, -9156.5484265399, -9042.80395519736, -8930.47244037056, -8819.53633003176, -8709.978290188035, -8601.781202172735, -8494.92815997076, -8389.402467576845, -8285.18763638687, -8182.267382621445, -8080.6256247815, -7980.24648113561, -7881.114267238345, -7783.213493479595, -7686.528862664275, -7591.04526762212, -7496.74778884715, -7403.621692166445, -7311.652426437935, -7220.825621276735, -7131.12708480976, -7042.54280145818, -6955.058929747505, -6868.66180014478, -6783.33791292275, -6699.073936050425, -6615.85670310999, -6533.673211239495, -6452.510619101125, -6372.356244874725, -6293.197564276255, -6215.02220860083, -6137.817962790065, -6061.57276352349, -5986.27469733358, -5911.91199874431, -5838.47304843273, -5765.946371413445, -5694.320635245605, -5623.58464826222, -5553.727357821415, -5484.73784857941, -5416.605340785055, -5349.31918859536, -5282.86887841217, -5217.244027239325, -5152.434381060305, -5088.429813236055, -5025.220322922605, -4962.796033508517, -4901.147191071563, -4840.26416285468, -4780.137435760853, -4720.75761486663, -4662.115421954194, -4604.201694061592, -4547.007382051021, -4490.523549194862, -4434.741369779315, -4379.6521277253505, -4325.247215226823, -4271.51813140547, -4218.456480982626, -4166.053972967463, -4114.302419361485, -4063.19373387915, -4012.719930684375, -3962.8731231427237, -3913.6455225890977, -3865.0294371107534, -3817.0172703454305, -3769.6015202943904, -3722.774778150219, -3676.5297271391814, -3630.859141377977, -3585.755884744661, -3541.212909763622, -3497.2232565043987, -3453.78005149416, -3410.876506643747, -3368.5059181869897, -3326.6616656332303, -3285.337210732884, -3244.5260964557906, -3204.2219459823177, -3164.4184617069477, -3125.109424254289, -3086.2886915072645, -3047.950197647396, -3010.08795220702, -2972.6960391332536, -2935.768615863606, -2899.2999124130656, -2863.2842304725164, -2827.715942518382, -2792.5894909333065, -2757.8993871377643, -2723.6402107324625, -2689.8066086513804, -2656.393294325361, -2623.3950468560597, -2590.806710200177, -2558.6231923638115, -2526.839464606816, -2495.4505606570674, -2464.4515759344613, -2433.837666784567, -2403.6040497217973, -2373.746000681975, -2344.258854284188, -2315.138003101822, -2286.378896942633, -2257.977042137771, -2229.9280008396395, -2202.2273903284727, -2174.870882327514, -2147.8542023267405, -2121.1731289149266, -2094.823493120074, -2068.8011777579795, -2043.102116788923, -2017.7222946823474, -1992.6577457894116, -1967.9045537233526, -1943.458850747555, -1919.3168171711884, -1895.4746807523934, -1871.9287161088484, -1848.675244135666, -1825.710631430548, -1803.031289726035, -1780.633675328842, -1758.5142885661555, -1736.669673238787, -1715.0964160811504, -1693.7911462279244, -1672.7505346873486, -1651.9712938210614, -1631.450176830402, -1611.1839772490875, -1591.169528442206, -1571.403703111408, -1551.8834128062765, -1532.605607441732, -1513.5672748214665, -1494.765440167274, -1476.1971656542385, -1457.8595499516896, -1439.7497277698644, -1421.864869412203, -1404.2021803332025, -1386.7589007017534, -1369.53230496993, -1352.5197014470996, -1335.7184318793495, -1319.125871034126, -1302.7394262900395, -1286.556537231763, -1270.574675249952, -1254.791343146162, -1239.2040747426365, -1223.810434496975, -1208.6080171215724, -1193.5944472077815, -1178.7673788547625, -1164.1244953029145, -1149.663508571894, -1135.3821591031035, -1121.278215406629, -1107.3494737125784, -1093.5937576267204, -1080.008917790431, -1066.5928315448484, -1053.3434025991994, -1040.2585607032574, -1027.336261323853, -1014.5744853254186, -1001.9712386544905, -989.524552028132, -977.23248062624, -965.0931037876455, -953.10452471002, -941.264870153494, -929.572290147952, -918.0249577039815, -906.6210685273945, -895.3588407373055, -884.23651458771, -873.252352192516, -862.4046372540055, -851.6916747946455, -841.1117908922595, -830.6633324184605, -820.344666780355, -810.154181665445, -800.0902847896965, -790.1514036487486, -780.3359852722025, -770.642495980965, -761.069421147616, -751.6152649597345, -742.278550186181, -733.0578179462785, -723.9516274818515, -714.9585559321165, -706.077198111345, -697.30616628931, -688.644089974443, -680.0896156996974, -671.6414068110635, -663.298143258715, -655.058521390746, -646.9212537494775, -638.885068870282, -630.948711082927, -623.110940315359, -615.370531899949, -607.726276382136, -600.17697933144, -592.721461154841, -585.3585569124535, -578.0871161355095, -570.9060026465955, -563.814094382117, -556.8102832169805, -549.893474791441, -543.062588340109, -536.3165565230815, -529.654325259158, -523.0748535611535, -516.577113373227, -510.1600894102545, -503.822778999187, -497.56419192237735, -491.3833502628646, -485.2792882515617, -479.25105211636173, -473.2976999331047, -467.4183014783985, -461.611938084274, -455.87770249463625, -450.2146987235096, -444.62204191503196, -439.09885820519935, -433.6442845853206, -428.2574687671691, -422.93756904981444, -417.68375418810155, -412.49520326277013, -407.3711055521819, -402.31066040564366, -397.3130771183068, -392.3775748076141, -387.5033822912886, -382.68973796683287, -377.93588969252795, -373.2410946699089, -368.60461932770204, -364.0257392072018, -359.50373884907384, -355.0379116815623, -350.62755991008595, -346.2719944082081, -341.97053460995727, -337.7225084034885, -333.5272520260644, -329.3841099603409, -325.29243483194057, -321.25158730830043, -317.26093599877356, -313.31985735597374, -309.4277355783448, -305.5839625139406, -301.78793756539926, -298.03906759609987, -294.33676683748195, -290.6804567975204, -287.0695661703321, -283.50353074691037, -279.98179332696526, -276.503803631859, -273.0690182186267, -269.67690039505936, -266.32692013584483, -263.01855399975307, -259.7512850478439, -256.52460276269574, -253.3380029686379, -250.19098775296845, -247.0830653881578, -244.01375025501304, -240.98256276680016, -237.98902929430696, -235.0326820918382, -232.11305922412876, -229.22970449416516, -226.38216737190496, -223.5700029238793, -220.7927717436718, -218.0500398832604, -215.34137878521136, -212.66636521571735, -210.0245811984663, -207.4156139493301, -204.83905581186846, -202.2945041936303, -199.78156150324835, -197.29983508831444, -194.8489371740273, -192.4284848026013, -190.03809977342885, -187.677408583986, -185.3460423714716, -183.0436368551714, -180.7698322795396, -178.524273357985, -176.30660921735745, -174.116493343123, -171.9535835252207, -169.81754180459075, -167.7080344203686, -165.6247317577337, -163.5673082964062, -161.53544255978494, -159.5288170647142, -157.5471182718781, -155.5900365368083, -153.657266061502, -151.74850484664046, -149.8634546444, -148.0018209118515, -146.1633127649367, -144.34764293301726, -142.55452771398825, -140.7836869299477, -139.0348438834209, -137.3077253141236, -135.6020613562657, -133.91758549638334, -132.2540345316964, -130.611148528982, -128.9886707839599, -127.38634778118124, -125.8039291544172, -124.2411676475375, -122.6978190758774, -121.17364228808275, -119.66839912842914, -118.18185439961086, -116.7137758259892, -115.26393401730034, -113.83210243281205, -112.41805734592594, -111.02157780922055, -109.6424456199268, -108.2804452858344, -106.9353639916201, -105.6069915655951, -104.29512044686525, -102.999545652899, -101.7200647474991, -100.4564778091709, -99.20858739988455, -97.97619853422475, -96.75911864892365, -95.5571575727732, -94.3701274969098, -93.1978429454691, -92.04012074660486, -90.896780003868, -89.7676420679414, -88.65253050872525, -87.55127108776975, -86.4636917310498, -85.38962250207825, -84.32889557535265, -83.28134521013286, -82.2468077245429, -81.225121469996, -80.21612680593606, -79.219666074894, -78.23558357785335, -77.2637255499217, -76.3039401363051, -75.3560773685798, -74.4199891412599, -73.4955291886554, -72.5825530620178, -71.68091810696986, -70.7904834412155, -69.91110993252666, -69.04266017700326, -68.18499847760415, -67.33799082294405, -66.50150486635356, -65.67540990519996, -64.85957686046505, -64.0538782565755, -63.2581882014855, -62.47238236700495, -61.69633796937345, -60.92993375007505, -60.1730499568911, -59.42556832518905, -58.68737205944325, -57.95834581498525, -57.238375679981, -56.52734915763215, -55.82515514859805, -55.1316839336361, -54.44682715645805, -53.7704778067991, -53.10253020369735, -52.44287997898075, -51.7914240609598, -51.14806065832165, -50.51268924422585, -49.88521054059614, -49.265526502608544, -48.65354030337135, -48.049156318795774, -47.45228011265454, -46.86281842182617, -46.28067914172214, -45.7057713118956, -45.13800510182833, -44.57729179689491, -44.02354378450079, -43.4766745403922, -42.93659861513733, -42.403231620773866, -41.876490217623875, -41.35629210127146, -40.84255598970246, -40.335201610604386, -39.834149688823175, -39.33932193397661, -38.850641028221304, -38.368030614171474, -37.891415282968104, -37.4207205624959, -36.95587290574717, -36.49679967932984, -36.04342915211823, -35.59569048404515, -35.15351371503264, -34.71682975406103, -34.285570368372916, -33.85966817281191, -33.43905661929346, -33.02366998640654, -32.61344336914463, -32.208312668764236, -31.80821458276912, -31.413086595019404, -31.02286696596303, -30.637494722989086, -30.25690965090057, -29.88105228250564, -29.509863889325874, -29.14328647241955, -28.78126275331951, -28.423736165083135, -28.070650843453606, -27.721951618131136, -27.377584004152176, -27.037494193376606, -26.70162904607948, -26.369936082648305, -26.042363475382643, -25.718860040396226, -25.39937522961912, -25.083859122899725, -24.77226242020447, -24.464536433914606, -24.16063308121869, -23.860504876599617, -23.56410492441484, -23.2713869115688, -22.9823051002765, -22.69681432091677, -22.41486996497459, -22.13642797807069, -21.861444853078204, -21.589877623324426, -21.321683855877176, -21.056821644914724, -20.79524960517781, -20.53692686550302, -20.28181306243681, -20.029868333928444, -19.78105331310152, -19.5353291221029, -19.292657366027846, -19.053000126920804, -18.8163199578506, -18.582579877059324, -18.351743362183885, -18.123774344549243, -17.89863720353264, -17.67629676099776, -17.45671827579818, -17.2398674383489, -17.02571036526544, -16.814213594069503, -16.6053440779603, -16.39906918065107, -16.19535667126949, -15.994174719321565, -15.795491889718084, -15.599277137862694, -15.4054998048013, -15.214129612431416, -15.025136658771196, -14.83849141328719, -14.654164712280066, -14.47212775432785, -14.29235209578556, -14.114809646340815, -13.93947266462476, -13.766313753877284, -13.59530585766632, -13.42642225566023, -13.259636559452645, -13.0949227084392, -12.932254965745615, -12.77160791420615, -12.61295645239221, -12.456275790690075, -12.30154144742766, -12.148729245048965, -11.997815306336495, -11.84877605068029, -11.70158819039341, -11.55622872707322, -11.4126749480078, -11.27090442262709, -11.13089499899801, -10.99262480036323, -10.85607222172286, -10.72121592645858, -10.58803484299982, -10.456508161531255, -10.326615330741214, -10.19833605461053, -10.071650289241195, -9.946538239724514, -9.82298035704806, -9.70095733504116, -9.58045010735821, -9.46143984449953, -9.34390795086926, -9.2278360618697, -9.113206041031845, -8.999999977181515, -8.888200181640675, -8.7777891854636, -8.668749736707236, -8.56106479773562, -8.454717542557695, -8.34969135419816, -8.24596982210113, -8.14353673956589, -8.04237610121457, -7.942472100491305, -7.843809127192375, -7.74637176502713, -7.65014478920915, -7.55511316407733, -7.46126204074655, -7.368576754787455, -7.27704282393519, -7.18664594582645, -7.097371995764745, -7.009207024513375, -6.922137256115835, -6.83614908574329, -6.75122907756881, -6.667363962667995, -6.584540636945665, -6.50274615908832, -6.42196774854207, -6.342192783515595, -6.263408799008015, -6.185603484861205, -6.10876468383625, -6.03288038971395, -5.957938745418755, -5.883928041166105, -5.81083671263275, -5.738653339149785, -5.66736664191818, -5.59696548224645, -5.527438859810165, -5.458775910933195, -5.390965906890185, -5.323998252230225, -5.257862483121235, -5.19254826571502, -5.128045394532575, -5.06434379086943, -5.001433501220885, -4.9393046957267135, -4.877947666635256, -4.817352826786552, -4.757510708114328, -4.698411960166597, -4.640047348644638, -4.582407753960109, -4.525484169810097, -4.4692677017698585, -4.413749565903058, -4.358921087389252, -4.304773699168423, -4.251298940602375, -4.198488456152708, -4.146333994075279, -4.094827405159761, -4.043960641330749, -3.9937257546033047, -3.9441148956654075, -3.895120312720975, -3.846734350268475, -3.798949447904583, -3.7517581391428574, -3.7051530502470484, -3.6591268990789687, -3.6136724939605775, -3.568782732550434, -3.5244506007334273, -3.480669171596741, -3.437431604159537, -3.394731142439839, -3.3525611144069734, -3.310914930911534, -3.269786084655792, -3.229168149176946, -3.1890547778429355, -3.149439702860783, -3.11031673429725, -3.071679759111602, -3.033522740200473, -2.995839715454516, -2.958624796826818, -2.9218721694128953, -2.8855760905420444, -2.849730888880083, -2.8143309635431475, -2.7793707832225363, -2.7448448853204477, -2.7107478750963727, -2.677074424824194, -2.643819272959657, -2.610977223318242, -2.5785431442632407, -2.546511967903859, -2.514878689303379, -2.483638365697046, -2.4527861157197437, -2.4223171186432415, -2.3922266136228814, -2.3625098989536797, -2.333162331335603, -2.3041793251480156, -2.2755563517331323, -2.2472889386883397, -2.219372669167366, -2.1918031811900804, -2.1645761669609027, -2.137687372195707, -2.1111325954570326, -2.084907687497658, -2.059008550612255, -2.0334311379971837, -2.0081714531182464, -1.9832255490862796, -1.958589528040601, -1.934259540540067, -1.9102317849617605, -1.886502506907169, -1.8630679986157226, -1.839924598385672, -1.817068690002116, -1.7944967021721705, -1.7722051079671306, -1.7501904242715225, -1.728449211239017, -1.706978071755023, -1.6857736509059635, -1.6648326354550984, -1.6441517533248016, -1.6237277730852786, -1.6035575034495595, -1.583637792774774, -1.563965528569592, -1.54453763700775, -1.525351082447638, -1.5064028669578264, -1.4876900298484925, -1.469209647208687, -1.4509588314493165, -1.43293473085186, -1.415134529122652, -1.3975554449527474, -1.380194731583259, -1.3630496763760864, -1.346117600390014, -1.32939585796206, -1.3128818362940524, -1.296572955044349, -1.2804666659246136, -1.2645604523016425, -1.2488518288041075, -1.2333383409342085, -1.218017564684159, -1.202887106157411, -1.187944601194621, -1.1731877150042385, -1.158614141797694, -1.14422160442913, -1.1300078540395835, -1.115970669705618, -1.102107858092294, -1.0884172531104654, -1.074896715578342, -1.0615441328872266, -1.0483574186714375, -1.0353345124823075, -1.022473379466244, -1.009772010046792, -0.9972284196106285, -0.984840648197486, -0.9726067601938965, -0.9605248440307645, -0.948593011884691, -0.936809399382996, -0.9251721653124265, -0.913679491331462, -0.902329581686208, -0.8911206629298205, -0.8800509836454035, -0.8691188141723645, -0.8583224463361525, -0.847660193181373, -0.8371303887082076, -0.826731387612102, -0.8164615650267075, -0.806319316269996, -0.7963030565935385, -0.7864112209349055, -0.7766422636731215, -0.766994658387188, -0.757466897617574, -0.748057492630694, -0.738764973186305, -0.72958788730778, -0.720524801055259, -0.7115742983015915, -0.7027349805110845, -0.694005466520986, -0.685384392325682, -0.6768704108635825, -0.668462191806643, -0.6601584213525135, -0.6519578020192505, -0.6438590524425954, -0.635860907175763, -0.627962116491711, -0.620161446187878, -0.612457677393327, -0.6048496063783025, -0.597336044366141, -0.589915817347516, -0.582587765896998, -0.575350744991878, -0.568203623833271, -0.5611452856693675, -0.554174627620974, -0.547290560509164, -0.540492008685075, -0.533777909861822, -0.5271472149485035, -0.5205988878862575, -0.514131905486365, -0.507745257270351, -0.5014379453120835, -0.49520898408182573, -0.48905740029222566, -0.48298223274622376, -0.4769825321868412, -0.47105736114884145, -0.46520579381223504, -0.4594269158575961, -0.45371982432318925, -0.4480836274638597, -0.4425174446116865, -0.43702040603836567, -0.43159165281929895, -0.4262303366993828, -0.42093561996045387, -0.41570667529039346, -0.4105426856538562, -0.40544284416460274, -0.40040635395942925, -0.39543242807365453, -0.3905202893181667, -0.3856691701579932, -0.38087831259238025, -0.37614696803637276, -0.3714743972038568, -0.36685986999206416, -0.36230266536751426, -0.35780207125336827, -0.35335738441819325, -0.3489679103661016, -0.34463296322826176, -0.3403518656557593, -0.3361239487137825, -0.33194855177713306, -0.32782502242702743, -0.32375271634918423, -0.31973099723318, -0.31575923667304845, -0.3118368140691225, -0.3079631165310868, -0.30413753878224, -0.30035948306494376, -0.2966283590472411, -0.2929435837306409, -0.2893045813590368, -0.2857107833287636, -0.2821616280997663, -0.2786565611078668, -0.27519503467812506, -0.2717765079392669, -0.2684004467391774, -0.2650663235614404, -0.2617736174429087, -0.2585218138923023, -0.25531040480980777, -0.25213888840767856, -0.24900676913182035, -0.24591355758434216, -0.24285877044707416, -0.23984193040602786, -0.2368625660767961, -0.23392021193087856, -0.2310144082229158, -0.2281447009188333, -0.2253106416248695, -0.22251178751748946, -0.2197477012741674, -0.2170179510050258, -0.2143221101853278, -0.21165975758880334, -0.2090304772218065, -0.2064338582582916, -0.20386949497559315, -0.2013369866910099, -0.19883593769917204, -0.19636595721019004, -0.1939266592885738, -0.1915176627929062, -0.18913859131627436, -0.18678907312743506, -0.184468741112716, -0.1821772327186394, -0.179914189895256, -0.1776792590401881, -0.1754720909433639, -0.1732923407324409, -0.1711396678189081, -0.16901373584485266, -0.16691421263039274, -0.16484077012175585, -0.16279308434006726, -0.1607708353299876, -0.15877370711468666, -0.1568013876281592, -0.154853583120753, -0.1529299599682956, -0.15103023926394754, -0.1491540790420025, -0.14730125731497545, -0.1454714517090932, -0.14366437631350634, -0.14187974876900786, -0.1401172902239123, -0.1383767252904864, -0.13665778200191764, -0.13496019176981966, -0.13328368934226506, -0.13162801276233824, -0.1299929033272054, -0.1283781055476904, -0.1267833671083545, -0.1252084388280724, -0.12365307462109575, -0.12211703145860316, -0.12060006933072515, -0.1191019512090426, -0.11762244300955164, -0.11616131355608555, -0.11471833454419485, -0.11329328050547305, -0.11188592877232705, -0.11049605944318565, -0.1091234553481381, -0.1077679020150024, -0.10642918763581205, -0.10510710303372155, -0.10380144163032234, -0.10251199941336345, -0.1012385749048754, -0.09998096912968775, -0.0987389855843394, -0.09751243020637484, -0.09630111134402045, -0.09510483972623995, -0.09392342843315915, -0.0927566928668608, -0.09160445072253895, -0.09046652196001374, -0.0893427287756009, -0.0882328955743288, -0.08713684894250145, -0.086054417620603, -0.0849854324765367, -0.08392972647919936, -0.0828871346723808, -0.0818574941489898, -0.08084064402560015, -0.0798364254173109, -0.0788446814129218, -0.07786525705041415, -0.0768979992927386, -0.0759427570039029, -0.07499938092535564, -0.07406772365266555, -0.07314763961248814, -0.07223898503982006, -0.07134161795553595, -0.0704553981442029, -0.06958018713217265, -0.0687158481659438, -0.0678622461907942, -0.0670192478296789, -0.06618672136238855, -0.0653645367049691, -0.064552565389395, -0.06375068054349615, -0.06295875687113404, -0.0621766706326235, -0.0614042996253989, -0.06064152316491875, -0.0598882220658092, -0.059144278623241, -0.0584095765945376, -0.05768400118101235, -0.0569674390100307, -0.0562597781172954, -0.0555609079293524, -0.05487071924631285, -0.05418910422479095, -0.0535159563610528, -0.05285117047437505, -0.0521946426906105, -0.0515462704259571, -0.05090595237092955, -0.05027358847452895, -0.049649079928609785, -0.0490323291524414, -0.04842323977746004, -0.04782171663221184, -0.04722766572748149, -0.04664099424160638, -0.04606161050597344, -0.04548942399069507, -0.04492434529046427, -0.04436628611058439, -0.043815159253173135, -0.04327087860353789, -0.04273335911671967, -0.0422025168042052, -0.04167826872080306, -0.04116053295168359, -0.04064922859957963, -0.0401442757721458, -0.03964559556947558, -0.0391531100717727, -0.03866674232717608, -0.038186416339736254, -0.037712057057540306, -0.037243590360985436, -0.03678094305119715, -0.036324042838591954, -0.03587281833158219, -0.035427199025420485, -0.034987115291183764, -0.034552498364893144, -0.03412328033676957, -0.03369939414062303, -0.033280773543372846, -0.03286735313469914, -0.03245906831682188, -0.03205585529440755, -0.03165765106460118, -0.031264393407181575, -0.030876020874839805, -0.030492472783577484, -0.030113689203224953 + ], + "aug_multipoles": [ + 0.3960392600307058, -0.008987255564312612, 0.0, 0.0, -0.008987255564312612, -0.0237558927567202, 0.0, 0.0, 0.0, 0.0, 0.01082879407556624, 0.01916025224445008, 0.0, 0.0, 0.01916025224445008, 0.03237927822587573, 0.0, 0.0, 0.02244681499868375, 0.04153788156905582, 0.0, 0.0, -0.003173598878361797, -0.005785938142916984, 0.02244681499868375, -0.003173598878361797, 0.0, 0.0, 0.04153788156905582, -0.005785938142916984, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.008718933215632494, 0.01635115791731505, 0.0, 0.0, 0.01635115791731505, 0.02737960698173533 + ] + }, + "header": { + "pseudo_type": "PAW", + "number_of_proj": 4, + "cutoff_radius_index": 729, + "z_valence": 3.0, + "mesh_size": 1017, + "element": "Li", + "number_of_wfc": 2, + "paw_core_energy": -2.687095342679413e-06, + "core_correction": false + }, + "augmentation": [ + { + "i": 0, + "radial_function": [ + 1.972272946279819e-06, 2.022201237352647e-06, 2.073393467271995e-06, 2.125881632706754e-06, 2.179698540320463e-06, 2.234877827276158e-06, 2.291453982260308e-06, 2.349462367038961e-06, 2.408939238559554e-06, 2.469921771612261e-06, 2.532448082064935e-06, 2.596557250686287e-06, 2.662289347572102e-06, 2.7296854571898e-06, 2.798787704057002e-06, 2.869639279070081e-06, 2.942284466499256e-06, 3.016768671667018e-06, 3.093138449327222e-06, 3.171441532762596e-06, 3.251726863618778e-06, 3.334044622493629e-06, 3.418446260300873e-06, 3.504984530427671e-06, 3.593713521706268e-06, 3.684688692220227e-06, 3.777966903966502e-06, 3.873606458394916e-06, 3.971667132847286e-06, 4.072210217918996e-06, 4.175298555766286e-06, 4.280996579383303e-06, 4.389370352873389e-06, 4.500487612739772e-06, 4.614417810221537e-06, 4.731232154701214e-06, 4.851003658211215e-06, 4.973807181066911e-06, 5.099719478654806e-06, 5.228819249405161e-06, 5.361187183978891e-06, 5.496906015699632e-06, 5.636060572262395e-06, 5.778737828751148e-06, 5.92502696199851e-06, 6.075019406321356e-06, 6.228808910667404e-06, 6.386491597208319e-06, 6.548166021416006e-06, 6.713933233659662e-06, 6.883896842361972e-06, 7.058163078754098e-06, 7.236840863269724e-06, 7.420041873619776e-06, 7.607880614590397e-06, 7.800474489607537e-06, 7.997943874113222e-06, 8.200412190799118e-06, 8.408005986744452e-06, 8.620855012506592e-06, 8.83909230321348e-06, 9.062854261708856e-06, 9.292280743802055e-06, 9.527515145675665e-06, 9.768704493505787e-06, 1.001599953535063e-05, 1.026955483536516e-05, 1.052952887040051e-05, 1.079608412904842e-05, 1.106938721319293e-05, 1.134960894213225e-05, 1.163692445933641e-05, 1.193151334190703e-05, 1.223355971280769e-05, 1.254325235593515e-05, 1.286078483410307e-05, 1.318635561001216e-05, 1.352016817028225e-05, 1.386243115262376e-05, 1.421335847622808e-05, 1.457316947545814e-05, 1.494208903692303e-05, 1.532034774002196e-05, 1.570818200104552e-05, 1.610583422092437e-05, 1.65135529367173e-05, 1.693159297693378e-05, 1.736021562078764e-05, 1.779968876148146e-05, 1.825028707362396e-05, 1.871229218488431e-05, 1.91859928519914e-05, 1.967168514118734e-05, 2.016967261324827e-05, 2.068026651318802e-05, 2.120378596476278e-05, 2.174055816989885e-05, 2.229091861316749e-05, 2.28552112714348e-05, 2.343378882881789e-05, 2.402701289708065e-05, 2.463525424160804e-05, 2.525889301309896e-05, 2.589831898512292e-05, 2.655393179768906e-05, 2.722614120697864e-05, 2.791536734139838e-05, 2.862204096411334e-05, 2.934660374222388e-05, 3.008950852275491e-05, 3.085121961562892e-05, 3.163221308380059e-05, 3.243297704073344e-05, 3.325401195540434e-05, 3.409583096502669e-05, 3.495896019568677e-05, 3.584393909109424e-05, 3.675132074965173e-05, 3.768167227005358e-05, 3.863557510563036e-05, 3.9613625427659e-05, 4.061643449786701e-05, 4.164462905036176e-05, 4.269885168322427e-05, 4.377976126001165e-05, 4.488803332141799e-05, 4.60243605073523e-05, 4.718945298969512e-05, 4.838403891600515e-05, 4.960886486445245e-05, 5.086469631026131e-05, 5.215231810395534e-05, 5.347253496170183e-05, 5.482617196806215e-05, 5.621407509146185e-05, 5.763711171270097e-05, 5.90961711668359e-05, 6.059216529876993e-05, 6.212602903289876e-05, 6.369872095716764e-05, 6.531122392190214e-05, 6.696454565378879e-05, 6.865971938538649e-05, 7.039780450056191e-05, 7.217988719625208e-05, 7.400708116096498e-05, 7.588052827044351e-05, 7.7801399300925e-05, 7.977089466044115e-05, 8.179024513861548e-05, 8.38607126754232e-05, 8.598359114939585e-05, 8.816020718575947e-05, 9.039192098501097e-05, 9.268012717245002e-05, 9.502625566919279e-05, 9.743177258521418e-05, 9.989818113497254e-05, 0.0001024270225761871, 0.0001050198771723555, 0.0001076783651796065, 0.0001104041478585077, 0.0001131989285114549, 0.0001160644535462892, 0.0001190025135668079, 0.0001220149444908409, 0.0001251036286965983, 0.000128270496197993, 0.0001315175258496771, 0.0001348467465825397, 0.0001382602386704314, 0.0001417601350289123, 0.0001453486225468209, 0.0001490279434515005, 0.0001528003967085303, 0.0001566683394568252, 0.0001606341884800116, 0.0001647004217149747, 0.0001688695797985326, 0.0001731442676531884, 0.0001775271561129463, 0.0001820209835902114, 0.0001866285577847933, 0.0001913527574360904, 0.0001961965341195348, 0.000201162914088412, 0.0002062550001622099, 0.000211475973662654, 0.0002168290963986438, 0.0002223177127013172, 0.0002279452515104997, 0.0002337152285138488, 0.0002396312483400006, 0.0002456970068070942, 0.0002519162932280638, 0.0002582929927741178, 0.0002648310888978909, 0.0002715346658177461, 0.0002784079110647849, 0.0002854551180941357, 0.0002926806889621303, 0.0003000891370710463, 0.0003076850899830878, 0.0003154732923053676, 0.0003234586086476664, 0.0003316460266547917, 0.0003400406601154305, 0.000348647752149395, 0.0003574726784752453, 0.0003665209507603083, 0.0003757982200551445, 0.0003853102803146136, 0.0003950630720076786, 0.0004050626858181989, 0.00041531536643899, 0.0004258275164614751, 0.0004366057003633559, 0.0004476566485967231, 0.0004589872617791546, 0.0004706046149903684, 0.0004825159621770674, 0.0004947287406687134, 0.0005072505758069681, 0.0005200892856916815, 0.0005332528860463306, 0.0005467495952058806, 0.0005605878392301714, 0.0005747762571459184, 0.0005893237063205783, 0.0006042392679713659, 0.000619532252812776, 0.0006352122068461132, 0.0006512889172945222, 0.0006677724186871869, 0.0006846729990964105, 0.0007020012065313623, 0.000719767855492447, 0.0007379840336902388, 0.0007566611089331233, 0.000775810736187831, 0.000795444864817141, 0.0008155757459992206, 0.0008362159403330359, 0.0008573783256345265, 0.0008790761049282433, 0.0009013228146393056, 0.0009241323329906635, 0.0009475188886107301, 0.0009714970693566203, 0.0009960818313583288, 0.001021288508289302, 0.001047132820869032, 0.001073630886603381, 0.001100799229768527, 0.001128654791644552, 0.001157214941004806, 0.001186497484867386, 0.001216520679515169, 0.001247303241791018, 0.001278864360674945, 0.001311223709150128, 0.001344401456364933, 0.001378418280098154, 0.001413295379534955, 0.001449054488361098, 0.001485717888183264, 0.001523308422283458, 0.001561849509715633, 0.001601365159752933, 0.001641879986694081, 0.001683419225037668, 0.00172600874503333, 0.001769675068618938, 0.001814445385753231, 0.001860347571153449, 0.001907410201447812, 0.001955662572752892, 0.002005134718686133, 0.002055857428824065, 0.00210786226761696, 0.002161181593770897, 0.002215848580108545, 0.002271897233920117, 0.002329362417816285, 0.002388279871095102, 0.002448686231635174, 0.002510619058327758, 0.002574116854060511, 0.002639219089266158, 0.002705966226049438, 0.002774399742906053, 0.002844562160047707, 0.002916497065347496, 0.00299024914092035, 0.003065864190353482, 0.003143389166602094, 0.003222872200566002, 0.003304362630363084, 0.003387911031315864, 0.003473569246667859, 0.003561390419046609, 0.00365142902269082, 0.003743740896459187, 0.003838383277639051, 0.003935414836573252, 0.004034895712123962, 0.004136887547992718, 0.004241453529916137, 0.004348658423757335, 0.004458568614513293, 0.00457125214625896, 0.004686778763049194, 0.004805219950800051, 0.004926648980171393, 0.005051140950473163, 0.005178772834617986, 0.005309623525143465, 0.005443773881327532, 0.005581306777421048, 0.005722307152022024, 0.005866862058616267, 0.006015060717309827, 0.006166994567778894, 0.006322757323463268, 0.006482445027030003, 0.006646156107134213, 0.006813991436504284, 0.006986054391379554, 0.007162450912328501, 0.007343289566476147, 0.007528681611169795, 0.007718741059112327, 0.007913584744993218, 0.008113332393647234, 0.008318106689771551, 0.008528033349232262, 0.008743241191991463, 0.00896386221668685, 0.009190031676895555, 0.009421888159114716, 0.009659573662491414, 0.009903233680334597, 0.01015301728344256, 0.01040907720527903, 0.01067156992903167, 0.01094065577658667, 0.01121649899945339, 0.01149926787167324, 0.01178913478474686, 0.01208627634461384, 0.01239087347071937, 0.0127031114972018, 0.01302318027623557, 0.01335127428356342, 0.01368759272625166, 0.01403233965270262, 0.01438572406495696, 0.01474796003331969, 0.01511926681334199, 0.01549986896519128, 0.01588999647544121, 0.01628988488131218, 0.01669977539739325, 0.01711991504487445, 0.0175505567833182, 0.01799195964499756, 0.0184443888718272, 0.01890811605491296, 0.0193834192767433, 0.01987058325604523, 0.02036989949532528, 0.02088166643111433, 0.02140618958693356, 0.02194378172899612, 0.02249476302465713, 0.02305946120362237, 0.02363821172192255, 0.02423135792865853, 0.02483925123551838, 0.02546225128906453, 0.02610072614578633, 0.02675505244990755, 0.02742561561393685, 0.0281128100019429, 0.02881703911553181, 0.02953871578249988, 0.03027826234812911, 0.03103611086908634, 0.03181270330988305, 0.03260849174184394, 0.03342393854452719, 0.03425951660953162, 0.03511570954661759, 0.0359930118920618, 0.03689192931915566, 0.03781297885074908, 0.03875668907373125, 0.03972360035532916, 0.04071426506109472, 0.04172924777443914, 0.04276912551756062, 0.04383448797359978, 0.04492593770984144, 0.04604409040176975, 0.04718957505776596, 0.04836303424422338, 0.04956512431083715, 0.05079651561580736, 0.0520578927506778, 0.05334995476450988, 0.0546734153870725, 0.05602900325070652, 0.05741746211049749, 0.05883955106236998, 0.06029604475868686, 0.06178773362091405, 0.0633154240488807, 0.06487993862613586, 0.06648211632087331, 0.06812281268186172, 0.06980290002878449, 0.07152326763635843, 0.07328482191156144, 0.07508848656326288, 0.07693520276350758, 0.07882592929966228, 0.08076164271658959, 0.0827433374479652, 0.08477202593580999, 0.08684873873725309, 0.0889745246174916, 0.09115045062785757, 0.09337760216784169, 0.09565708302986899, 0.09799001542555344, 0.1003775399920957, 0.1028208157774225, 0.1053210202025896, 0.1078793489999048, 0.1104970161251454, 0.1131752536421676, 0.1159153115781252, 0.1187184577474252, 0.1215859775424678, 0.1245191736891183, 0.1275193659647721, 0.1305878908767723, 0.1337261012988392, 0.1369353660630715, 0.1402170695049668, 0.1435726109588037, 0.1470034042006124, 0.1505108768358444, 0.1540964696287357, 0.1577616357702336, 0.1615078400812305, 0.1653365581477273, 0.1692492753844071, 0.1732474860229822, 0.1773326920215278, 0.1815064018908882, 0.1857701294340989, 0.1901253923946259, 0.1945737110090868, 0.1991166064599693, 0.2037555992237241, 0.2084922073094667, 0.2133279443833714, 0.2182643177737138, 0.2233028263513652, 0.2284449582804087, 0.2336921886334186, 0.2390459768657987, 0.2445077641434711, 0.2500789705180657, 0.255760991943661, 0.2615551971290195, 0.2674629242191584, 0.2734854773000214, 0.2796241227199402, 0.2858800852215114, 0.2922545438774806, 0.298748627824189, 0.3053634117861433, 0.3120999113852747, 0.3189590782285002, 0.3259417947672633, 0.3330488689228198, 0.3402810284711792, 0.3476389151817552, 0.3551230787039943, 0.3627339701964892, 0.3704719356933676, 0.3783372092030993, 0.3863299055352408, 0.3944500128510921, 0.4026973849347593, 0.4110717331816751, 0.419572618302309, 0.4281994417395024, 0.4369514367986851, 0.4458276594911363, 0.4548269790914254, 0.4639480684112959, 0.4731893937934223, 0.4825492048298104, 0.4920255238110362, 0.5016161349140745, 0.5113185731381839, 0.521130113000126, 0.5310477570020027, 0.5410682238871242, 0.5511879367016141, 0.5614030106819579, 0.5717092409913035, 0.5821020903301881, 0.592576676450367, 0.603127759603613, 0.6137497299608002, 0.6244365950401658, 0.6351819671874773, 0.6459790511548557, 0.6568206318292528, 0.6676990621660253, 0.6786062513877192, 0.6895336535130389, 0.700472256286062, 0.711412570581018, 0.722344620363424, 0.733257933293985, 0.7441415320674755, 0.7549839265847459, 0.7657731070620499, 0.7764965381880523, 0.7871411544450706, 0.79769335671736, 0.8081390103154705, 0.8184634445518734, 0.8286514540091291, 0.8386873016477328, 0.8485547239064536, 0.8582369379533104, 0.8677166512502901, 0.8769760735993973, 0.8859969318415144, 0.8947604873827717, 0.9032475567255507, 0.9114385351827141, 0.9193134239541103, 0.9268518607436024, 0.9340331540927322, 0.9408363216034709, 0.9472401322171191, 0.9532231527091829, 0.9587637985506972, 0.963840389274875, 0.9684312084738622, 0.9725145685335941, 0.976068880195079, 0.979072727007621, 0.9815049447133767, 0.9833447055729765, 0.9845716076085548, 0.9851657687032276, 0.985107925454653, 0.9843795366347109, 0.9829628910573522, 0.9808412196022702, 0.977998811083196, 0.9744211315862652, 0.9700949468361901, 0.9650084470759944, 0.9591513738700237, 0.9525151481601856, 0.9450929988222502, 0.9368800908830986, 0.9278736524716585, 0.9180730994867043, 0.907480156874598, 0.8960989753205361, 0.8839362420691458, 0.8710012845057488, 0.8573061650499781, 0.8428657658403298, 0.827697861623844, 0.8118231792114435, 0.7952654418190377, 0.7780513965898018, 0.7602108235867763, 0.7417765245601202, 0.7227842898327521, 0.7032728417149867, 0.6832837529561542, 0.6628613388720092, 0.6420525219541842, 0.6209066679745128, 0.5994753928453704, 0.5778123397891951, 0.5559729267075572, 0.5340140640233095, 0.5119938436983916, 0.4899712006035052, 0.4680055479318792, 0.4461563889035008, 0.4244829075932073, 0.4030435423280482, 0.3818955457268072, 0.3610945360852994, 0.3406940454302627, 0.3207450701548793, 0.301295630689617, 0.2823903471296388, 0.2640700381079167, 0.2463713504416351, 0.2293264271561294, 0.2129626213702737, 0.1973022631729464, 0.1823624859932704, 0.1681551180287898, 0.1546866430071023, 0.1419582328811954, 0.1299658529641485, 0.1187004374679203, 0.1081481304049168, 0.09829058333184085, 0.08910529746939091, 0.08056599334192185, 0.07264298629336743, 0.0653035411193032, 0.05851217370948064, 0.05223086215328665, 0.04641912439276907, 0.04103996465795301, 0.03607823884173678, 0.0315232157824612, 0.02736269734499597, 0.02358309488123722, 0.02016951842594095, 0.01710587842531064, 0.01437499964444011, 0.01195874674168118, 0.009838160835294985, 0.007993606222794714, 0.00640492624917588, 0.005051607159575644, 0.003912948618537027, 0.00296823943520691, 0.002196936905157987, 0.001578848068847838, 0.001094311097011136, 0.0007243749479520223, 0.0004509754028871327, 0.0002571055749490125, 0.0001269790059414966, 4.618351149593414e-05, 1.824007998751931e-06, -1.734735066544202e-05, -2.081528797653887e-05, -1.62012143485879e-05, -9.180059468205659e-06, -3.415356443631223e-06, -5.137396031362762e-07, -5.551115123125783e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 0 + }, + { + "i": 0, + "radial_function": [ + 1.811683488642281e-07, 1.857546421326063e-07, 1.904570376951573e-07, 1.952784746814707e-07, 2.002219666248395e-07, 2.052906033457614e-07, 2.1048755288312e-07, 2.158160634742528e-07, 2.212794655851405e-07, 2.268811739919927e-07, 2.326246899155217e-07, 2.385136032092469e-07, 2.445515946031932e-07, 2.507424380043849e-07, 2.570900028555773e-07, 2.635982565536913e-07, 2.702712669294737e-07, 2.77113204789924e-07, 2.841283465250791e-07, 2.913210767807897e-07, 2.986958911991477e-07, 3.062573992282898e-07, 3.140103270033249e-07, 3.219595203001879e-07, 3.301099475642685e-07, 3.384667030157005e-07, 3.470350098332616e-07, 3.558202234188659e-07, 3.648278347446919e-07, 3.740634737850407e-07, 3.835329130350594e-07, 3.932420711185412e-07, 4.031970164870472e-07, 4.134039712126648e-07, 4.238693148767755e-07, 4.345995885572539e-07, 4.456014989165989e-07, 4.568819223935471e-07, 4.684479095007859e-07, 4.803066892314587e-07, 4.924656735772032e-07, 5.049324621605606e-07, 5.177148469846399e-07, 5.308208173030095e-07, 5.442585646128613e-07, 5.58036487774555e-07, 5.721631982607596e-07, 5.866475255384594e-07, 6.014985225871906e-07, 6.167254715569578e-07, 6.323378895693605e-07, 6.483455346655644e-07, 6.647584119048209e-07, 6.815867796173542e-07, 6.988411558155278e-07, 7.165323247672706e-07, 7.34671343735905e-07, 7.532695498905622e-07, 7.723385673915086e-07, 7.918903146548208e-07, 8.119370118009239e-07, 8.324911882916733e-07, 8.535656907607353e-07, 8.751736910421588e-07, 8.97328694402167e-07, 9.200445479792851e-07, 9.433354494381105e-07, 9.672159558421034e-07, 9.917009927509542e-07, 1.016805863548214e-06, 1.042546259004994e-06, 1.06893826708574e-06, 1.095998383002179e-06, 1.123743519521727e-06, 1.152191017536819e-06, 1.18135865690171e-06, 1.211264667543579e-06, 1.241927740854828e-06, 1.273367041373721e-06, 1.305602218760643e-06, 1.338653420077452e-06, 1.372541302377627e-06, 1.407287045615032e-06, 1.442912365879388e-06, 1.479439528966717e-06, 1.516891364293203e-06, 1.555291279161205e-06, 1.594663273386292e-06, 1.635031954294455e-06, 1.67642255209887e-06, 1.718860935665763e-06, 1.762373628679293e-06, 1.806987826215504e-06, 1.852731411735684e-06, 1.899632974509794e-06, 1.947721827480771e-06, 1.99702802558093e-06, 2.047582384511855e-06, 2.099416499999523e-06, 2.152562767536682e-06, 2.207054402624778e-06, 2.262925461528125e-06, 2.320210862553216e-06, 2.378946407866486e-06, 2.439168805864169e-06, 2.500915694108123e-06, 2.564225662842062e-06, 2.629138279102779e-06, 2.695694111441437e-06, 2.763934755270384e-06, 2.833902858851239e-06, 2.905642149940548e-06, 2.979197463109591e-06, 3.054614767755388e-06, 3.131941196820412e-06, 3.211225076238869e-06, 3.292515955127977e-06, 3.37586463674307e-06, 3.461323210215791e-06, 3.548945083095247e-06, 3.638785014712322e-06, 3.730899150388094e-06, 3.825345056507561e-06, 3.922181756480639e-06, 4.02146976761284e-06, 4.123271138908551e-06, 4.22764948983063e-06, 4.334670050040348e-06, 4.444399700142531e-06, 4.556907013461313e-06, 4.672262298872457e-06, 4.790537644719066e-06, 4.911806963837964e-06, 5.036146039724862e-06, 5.163632573867088e-06, 5.294346234273293e-06, 5.428368705230496e-06, 5.565783738319418e-06, 5.706677204719841e-06, 5.851137148838718e-06, 5.999253843294232e-06, 6.151119845290268e-06, 6.306830054416313e-06, 6.466481771908715e-06, 6.630174761410409e-06, 6.798011311266661e-06, 6.970096298395891e-06, 7.146537253775184e-06, 7.327444429581251e-06, 7.512930868028785e-06, 7.703112471948771e-06, 7.898108077151008e-06, 8.098039526615616e-06, 8.303031746559786e-06, 8.51321282442717e-06, 8.728714088848191e-06, 8.949670191621263e-06, 9.17621919176581e-06, 9.408502641699222e-06, 9.646665675591603e-06, 9.890857099952848e-06, 1.014122948650879e-05, 1.039793926742379e-05, 1.066114683292904e-05, 1.09310166314175e-05, 1.120771727206694e-05, 1.149142163005588e-05, 1.178230695443664e-05, 1.208055497873329e-05, 1.238635203433294e-05, 1.269988916674035e-05, 1.302136225476877e-05, 1.335097213274014e-05, 1.36889247157711e-05, 1.403543112822219e-05, 1.439070783538969e-05, 1.475497677852262e-05, 1.512846551324722e-05, 1.551140735148599e-05, 1.590404150695856e-05, 1.630661324435425e-05, 1.671937403226978e-05, 1.714258170000536e-05, 1.75765005983175e-05, 1.802140176422736e-05, 1.847756308998631e-05, 1.894526949630452e-05, 1.942481310994779e-05, 1.991649344581411e-05, 2.042061759360161e-05, 2.093750040918298e-05, 2.146746471080582e-05, 2.201084148023836e-05, 2.256797006898598e-05, 2.31391984097056e-05, 2.37248832329473e-05, 2.432539028935877e-05, 2.494109457748736e-05, 2.557238057732183e-05, 2.621964248971707e-05, 2.688328448184834e-05, 2.75637209388482e-05, 2.826137672177852e-05, 2.897668743209806e-05, 2.97100996827876e-05, 3.046207137629841e-05, 3.123307198949689e-05, 3.202358286577773e-05, 3.283409751452672e-05, 3.366512191811675e-05, 3.451717484662353e-05, 3.539078818045688e-05, 3.62865072411019e-05, 3.720489113017501e-05, 3.814651307700137e-05, 3.911196079492538e-05, 4.010183684657438e-05, 4.11167590182957e-05, 4.215736070399796e-05, 4.322429129863007e-05, 4.431821660153667e-05, 4.54398192299386e-05, 4.658979904278661e-05, 4.776887357524915e-05, 4.897777848409744e-05, 5.021726800425695e-05, 5.148811541680624e-05, 5.279111352870254e-05, 5.412707516452844e-05, 5.549683367055663e-05, 5.690124343143586e-05, 5.834118039981477e-05, 5.981754263921895e-05, 6.133125088051226e-05, 6.288324909227768e-05, 6.447450506545867e-05, 6.610601101261833e-05, 6.777878418217079e-05, 6.949386748795845e-05, 7.125233015455178e-05, 7.305526837865588e-05, 7.490380600702626e-05, 7.679909523129165e-05, 7.874231730010587e-05, 8.073468324904992e-05, 8.277743464872064e-05, 8.487184437145234e-05, 8.701921737712528e-05, 8.922089151853002e-05, 9.147823836676457e-05, 9.379266405715167e-05, 9.616561015618015e-05, 9.859855454997816e-05, 0.0001010930123548459, 0.0001036505368503826, 0.0001062727204357549, 0.0001089611956096714, 0.0001117176359746344, 0.0001145437572660579, 0.0001174413184068532, 0.0001204121225880953, 0.0001234580183764008, 0.0001265809008486585, 0.0001297827127547721, 0.0001330654457090864, 0.0001364311414111851, 0.0001398818928967634, 0.0001434198458192943, 0.0001470471997632228, 0.0001507662095894413, 0.0001545791868138113, 0.0001584885010195183, 0.0001624965813040606, 0.0001666059177616931, 0.0001708190630021614, 0.000175138633706583, 0.0001795673122213507, 0.0001841078481909468, 0.0001887630602305841, 0.0001935358376396037, 0.0001984291421565772, 0.0002034460097570889, 0.0002085895524951834, 0.0002138629603894947, 0.0002192695033550865, 0.0002248125331820554, 0.0002304954855619766, 0.0002363218821632809, 0.0002422953327566883, 0.0002484195373918355, 0.0002546982886262552, 0.0002611354738079042, 0.0002677350774124335, 0.0002745011834364444, 0.000281437977847979, 0.0002885497510955214, 0.0002958409006768176, 0.0003033159337688315, 0.0003109794699201944, 0.0003188362438075154, 0.0003268911080569519, 0.0003351490361324652, 0.0003436151252922005, 0.0003522945996144697, 0.0003611928130948267, 0.0003703152528157533, 0.0003796675421905039, 0.0003892554442826702, 0.0003990848652030632, 0.0004091618575855216, 0.0004194926241432887, 0.0004300835213076197, 0.0004409410629502998, 0.0004520719241917849, 0.0004634829452966945, 0.0004751811356583987, 0.0004871736778744852, 0.0004994679319148836, 0.0005120714393844691, 0.0005249919278819747, 0.0005382373154570512, 0.0005518157151673538, 0.0005657354397375263, 0.0005800050063219831, 0.0005946331413733974, 0.0006096287856188233, 0.0006250010991453653, 0.0006407594665973608, 0.0006569135024870069, 0.0006734730566203882, 0.0006904482196408714, 0.0007078493286918001, 0.0007256869732004782, 0.0007439720007853692, 0.0007627155232884624, 0.0007819289229347511, 0.0008016238586207231, 0.0008218122723337973, 0.0008425063957045651, 0.0008637187566937169, 0.0008854621864154853, 0.0009077498260993828, 0.0009305951341920364, 0.0009540118936008067, 0.0009780142190808823, 0.00100261656476747, 0.001027833731854622, 0.001053680876422221, 0.00108017351741253, 0.001107327544757649, 0.001135159227659138, 0.00116368522302095, 0.00119292258403675, 0.001222888768932536, 0.001253601649865389, 0.001285079521979041, 0.001317341112616766, 0.001350405590692015, 0.001384292576216973, 0.001419022149989089, 0.001454614863435404, 0.001491091748614283, 0.001528474328373995, 0.001566784626667257, 0.001606045179020656, 0.001646279043157594, 0.001687509809773032, 0.001729761613458122, 0.001773059143772334, 0.001817427656460452, 0.00186289298481136, 0.00190948155115513, 0.001957220378494563, 0.002006137102266791, 0.002056259982230108, 0.002107617914470697, 0.002160240443523272, 0.002214157774599247, 0.002269400785915246, 0.002326001041114226, 0.002383990801770779, 0.002443403039971372, 0.002504271450959631, 0.002566630465835868, 0.002630515264299182, 0.002695961787419623, 0.00276300675042687, 0.002831687655500842, 0.00290204280454872, 0.002974111311951516, 0.003047933117262314, 0.003123548997836934, 0.003201000581376459, 0.003280330358359736, 0.003361581694342371, 0.003444798842097286, 0.003530026953570237, 0.003617312091621912, 0.00370670124152656, 0.003798242322195092, 0.003891984197088606, 0.003987976684786286, 0.004086270569169266, 0.004186917609179948, 0.004289970548113637, 0.004395483122396951, 0.004503510069804774, 0.004614107137064618, 0.004727331086794575, 0.004843239703717688, 0.004961891800092574, 0.005083347220296736, 0.005207666844495303, 0.005334912591324646, 0.005465147419516012, 0.00559843532838074, 0.005734841357074102, 0.005874431582550577, 0.00601727311611887, 0.006163434098500053, 0.006312983693287348, 0.006465992078700916, 0.006622530437525432, 0.006782670945112924, 0.006946486755327198, 0.007114051984300263, 0.007285441691864836, 0.007460731860520275, 0.00763999937178274, 0.007823321979763076, 0.008010778281808567, 0.008202447686037355, 0.008398410375585919, 0.008598747269382485, 0.00880353997925028, 0.009012870763135932, 0.009226822474249621, 0.009445478505893582, 0.009668922731746856, 0.009897239441363483, 0.01013051327063172, 0.01036882912693111, 0.01061227210871348, 0.01086092741922329, 0.01111488027406097, 0.01137421580228162, 0.01163901894070977, 0.01190937432113843, 0.01218536615006888, 0.01246707808063509, 0.01275459307634355, 0.0130479932662474, 0.01334735979115981, 0.01365277264049934, 0.01396431047934653, 0.01428205046527781, 0.01460606805453012, 0.01493643679703586, 0.0152732281198558, 0.01561651109852416, 0.0159663522158078, 0.01632281510736951, 0.01668596029381315, 0.01705584489857786, 0.01743252235113719, 0.01781604207494939, 0.01820644915959621, 0.01860378401653845, 0.01900808201791068, 0.01941937311777054, 0.01983768145521415, 0.0202630249387661, 0.02069541481145107, 0.02113485519595559, 0.02158134261929065, 0.02203486551637203, 0.02249540371194279, 0.0229629278802736, 0.02343739898209125, 0.02391876767820311, 0.02440697371930727, 0.0249019453115046, 0.02540359845705861, 0.02591183626998651, 0.02642654826610418, 0.02694760962719682, 0.02747488043903958, 0.0280082049030533, 0.02854741052144948, 0.02909230725579376, 0.02964268665900288, 0.03019832098088431, 0.03075896224743133, 0.03132434131420257, 0.03189416689424027, 0.03246812456112061, 0.0330458757278811, 0.03362705660273477, 0.03421127712266185, 0.03479811986616414, 0.03538713894667932, 0.03597785888838238, 0.03656977348634697, 0.03716234465330819, 0.0377550012555531, 0.03834713794077369, 0.03893811396104689, 0.03952725199445767, 0.04011383696925948, 0.0406971148948651, 0.04127629170438928, 0.04185053211391646, 0.04241895850414622, 0.04298064983057793, 0.04353464056892938, 0.04407991970304952, 0.04461542976317711, 0.04514006592301777, 0.04565267516476259, 0.04615205552184798, 0.04663695540996201, 0.04710607305753431, 0.04755805604770251, 0.04799150098452967, 0.0484049532970474, 0.04879690719552074, 0.04916580579516634, 0.04951004142340324, 0.04982795612757169, 0.0501178424009124, 0.05037794414545427, 0.05060645789130212, 0.05080153429264336, 0.05096127992159329, 0.05108375938176248, 0.05116699776414818, 0.05120898346861024, 0.05120767141477844, 0.05116098666673768, 0.05106682849623353, 0.05092307490941509, 0.05072758766226639, 0.05047821778984967, 0.05017281167427046, 0.04980921767585283, 0.04938529335135393, 0.04889891328212336, 0.04834797753389546, 0.04773042076835617, 0.04704422202471976, 0.04628741518724837, 0.0454581001519087, 0.04455445470215259, 0.0435747471000852, 0.04251734939501168, 0.04138075144648438, 0.04016357565347069, 0.0388645923750834, 0.03748273602141988, 0.03601712178541082, 0.03446706297814112, 0.03283208892084779, 0.03111196333669407, 0.02930670317443796, 0.02741659778425355, 0.02544222835320688, 0.02338448749424318, 0.02124459886803294, 0.01902413670166114, 0.01672504505200112, 0.01434965664473012, 0.01190071110242476, 0.00938137235712291, 0.006795245024290148, 0.004146389496461896, 0.001439335496135452, -0.001320906190996035, -0.004128834099362797, -0.006978448897476574, -0.009863248962613112, -0.01277622906072953, -0.01570988248849705, -0.01865620704412227, -0.02160671520029945, -0.02455244885551883, -0.02748399903840402, -0.03039153093301119, -0.03326481458030811, -0.03609326159155082, -0.0388659681820834, -0.04157176479833567, -0.04419927256554138, -0.04673696672801592, -0.04917324718682429, -0.05149651616042147, -0.05369526290156581, -0.05575815529773662, -0.05767413806182078, -0.05943253708452999, -0.06102316936960847, -0.06243645780742456, -0.06366354986232438, -0.06469643905486881, -0.06552808791292254, -0.06615255084716531, -0.06656509517919851, -0.06676231831692497, -0.06674225883593038, -0.06650449899166094, -0.0660502559606156, -0.0653824588958783, -0.06450580869042472, -0.06342681717914109, -0.0621538223868528, -0.06069697635543343, -0.05906820206982818, -0.05728111606316097, -0.05535091342840272, -0.05329421221245471, -0.05112885453247411, -0.0488736622484688, -0.04654814566498267, -0.04417216453166078, -0.04176554157983021, -0.03934762998011158, -0.03693683744172233, -0.03455011120098939, -0.03220238986321958, -0.02990602996103832, -0.02767021715890563, -0.02550037424449494, -0.02339758036976967, -0.0213607344749504, -0.01939587263724182, -0.0175106583722024, -0.01571198945860144, -0.01400592650444581, -0.01239762882594168, -0.01089129893101607, -0.00949013686438742, -0.008196305612559589, -0.007010908683900797, -0.00593398087111845, -0.004964493071626379, -0.004100371887069902, -0.003338534548884539, -0.002674939525461148, -0.002104652962311979, -0.001621930894281642, -0.001220316953836002, -0.0008927550874812396, -0.0006317165894017054, -0.0004293405730984509, -0.0002775868330660639, -0.0001683999031149505, -9.388299759444108e-05, -4.648042582787448e-05, -1.916699465105698e-05, -5.642851203149779e-06, -5.321558813747851e-07, 4.161036486760583e-07, 1.269646645457589e-07, 4.163336342344337e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 1 + }, + { + "i": 0, + "radial_function": [ + 6.951732417740043e-11, 7.217371890824069e-11, 7.493161971856415e-11, 7.77949053539839e-11, 8.07676027744457e-11, 8.38538928177746e-11, 8.705811607963674e-11, 9.038477901818537e-11, 9.383856029197448e-11, 9.742431734005815e-11, 1.011470932135239e-10, 1.050121236680734e-10, 1.090248445276219e-10, 1.13190899329274e-10, 1.175161472604285e-10, 1.220066713991708e-10, 1.266687872695478e-10, 1.315090517237535e-10, 1.365342721637174e-10, 1.417515161150686e-10, 1.471681211669322e-10, 1.527917052915471e-10, 1.586301775582104e-10, 1.646917492566224e-10, 1.709849454452712e-10, 1.775186169410999e-10, 1.843019527673212e-10, 1.913444930768805e-10, 1.986561425697485e-10, 2.06247184422909e-10, 2.141282947526328e-10, 2.223105576293808e-10, 2.308054806664491e-10, 2.396250112042807e-10, 2.487815531132077e-10, 2.582879842382486e-10, 2.681576745105009e-10, 2.784045047505992e-10, 2.890428861906784e-10, 3.00087780742307e-10, 3.115547220388802e-10, 3.23459837282082e-10, 3.358198699231294e-10, 3.486522032106997e-10, 3.619748846386647e-10, 3.758066513279957e-10, 3.901669563785601e-10, 4.050759962278502e-10, 4.205547390551304e-10, 4.366249542709466e-10, 4.533092431334643e-10, 4.706310705347078e-10, 4.886147980013868e-10, 5.072857179567293e-10, 5.266700892915136e-10, 5.46795174294295e-10, 5.676892769928032e-10, 5.893817829604027e-10, 6.119032006436073e-10, 6.352852042687767e-10, 6.595606783883157e-10, 6.847637641290464e-10, 7.109299072077823e-10, 7.380959077816279e-10, 7.662999722031224e-10, 7.955817667529878e-10, 8.259824734260791e-10, 8.575448478489646e-10, 8.903132794105939e-10, 9.24333853690633e-10, 9.596544172732214e-10, 9.963246450373464e-10, 1.034396110018434e-09, 1.073922355939408e-09, 1.114958972513245e-09, 1.157563673622867e-09, 1.201796378488382e-09, 1.247719295935767e-09, 1.295397011885536e-09, 1.344896580184402e-09, 1.396287616907665e-09, 1.449642398264985e-09, 1.505035962247165e-09, 1.56254621415693e-09, 1.622254036172104e-09, 1.684243401095209e-09, 1.74860149044954e-09, 1.815418817087702e-09, 1.884789352485084e-09, 1.956810658897269e-09, 2.031584026567174e-09, 2.109214616174962e-09, 2.18981160673097e-09, 2.273488349119637e-09, 2.360362525510407e-09, 2.450556314859673e-09, 2.5441965647366e-09, 2.641414969714396e-09, 2.74234825657788e-09, 2.84713837660786e-09, 2.955932705212567e-09, 3.068884249187054e-09, 3.186151861891865e-09, 3.30790046665359e-09, 3.434301288701545e-09, 3.565532095966496e-09, 3.701777449080323e-09, 3.843228960927917e-09, 3.990085566116429e-09, 4.14255380074075e-09, 4.300848092838534e-09, 4.465191063943328e-09, 4.635813842159724e-09, 4.81295638720079e-09, 4.996867827844874e-09, 5.187806812286116e-09, 5.386041871871525e-09, 5.591851798735925e-09, 5.805526037865755e-09, 6.027365094143127e-09, 6.257680954942228e-09, 6.496797528872578e-09, 6.745051101285766e-09, 7.0027908071863e-09, 7.270379122211462e-09, 7.548192372370352e-09, 7.836621263259108e-09, 8.136071429496118e-09, 8.446964005149852e-09, 8.769736215961413e-09, 9.104841994194106e-09, 9.452752616974983e-09, 9.813957369025443e-09, 1.01889642307128e-08, 1.057830059239029e-08, 1.098251399602944e-08, 1.14021729051879e-08, 1.183786750439501e-08, 1.229021052907888e-08, 1.275983812720205e-08, 1.324741075381648e-08, 1.375361409979619e-08, 1.427916005605264e-08, 1.482478771458852e-08, 1.539126440779745e-08, 1.597938678746998e-08, 1.658998194502355e-08, 1.722390857453053e-08, 1.788205818017937e-08, 1.856535632986666e-08, 1.927476395668121e-08, 2.001127871011097e-08, 2.077593635887091e-08, 2.156981224732443e-08, 2.239402280754553e-08, 2.324972712914609e-08, 2.413812858907618e-08, 2.506047654368725e-08, 2.601806808543646e-08, 2.701224986670194e-08, 2.804441999327098e-08, 2.911602999016402e-08, 3.022858684255593e-08, 3.138365511466348e-08, 3.258285914957736e-08, 3.382788535312831e-08, 3.512048456500054e-08, 3.646247452042022e-08, 3.785574240588192e-08, 3.930224751250321e-08, 4.080402399073425e-08, 4.236318371029732e-08, 4.398191922937014e-08, 4.566250687718875e-08, 4.740730995440006e-08, 4.921878205565868e-08, 5.109947051914191e-08, 5.305202000782179e-08, 5.507917622753245e-08, 5.718378978705333e-08, 5.936882020562943e-08, 6.163734007356426e-08, 6.399253937172277e-08, 6.643772995601864e-08, 6.897635021318221e-08, 7.161196989434714e-08, 7.434829513325177e-08, 7.718917365609487e-08, 8.01386001903701e-08, 8.320072208027395e-08, 8.63798451165695e-08, 8.968043958910274e-08, 9.310714657045992e-08, 9.666478443959867e-08, 1.003583556546113e-07, 1.041930537841251e-07, 1.081742708072239e-07, 1.123076046921273e-07, 1.165988672642781e-07, 1.21054092374881e-07, 1.256795443813552e-07, 1.304817269516183e-07, 1.354673922045445e-07, 1.406435501994412e-07, 1.460174787878578e-07, 1.515967338415483e-07, 1.573891598709585e-07, 1.6340290104912e-07, 1.696464126564356e-07, 1.761284729624127e-07, 1.828581955609999e-07, 1.898450421768965e-07, 1.970988359606709e-07, 2.046297752915003e-07, 2.124484481067906e-07, 2.205658467788015e-07, 2.289933835591429e-07, 2.377429066128172e-07, 2.468267166642652e-07, 2.562575842788156e-07, 2.660487678036981e-07, 2.762140319938167e-07, 2.867676673483849e-07, 2.977245101855912e-07, 3.090999634832775e-07, 3.209100185150688e-07, 3.331712773120332e-07, 3.45900975981586e-07, 3.591170089161236e-07, 3.728379539254053e-07, 3.870830983278915e-07, 4.018724660375967e-07, 4.172268456842748e-07, 4.331678198065578e-07, 4.497177951587124e-07, 4.669000341735803e-07, 4.847386876256815e-07, 5.032588285402774e-07, 5.224864873958314e-07, 5.424486886690911e-07, 5.631734887739953e-07, 5.846900154476475e-07, 6.070285086380541e-07, 6.302203629508773e-07, 6.542981717157627e-07, 6.792957727316053e-07, 7.052482957567073e-07, 7.321922118095685e-07, 7.60165384348809e-07, 7.892071224047421e-07, 8.193582357352968e-07, 8.506610920852051e-07, 8.831596766273558e-07, 9.168996536691189e-07, 9.51928430710768e-07, 9.882952249445497e-07, 1.026051132288092e-06, 1.065249199047396e-06, 1.10594449631072e-06, 1.148194197176049e-06, 1.192057656920635e-06, 1.237596496223927e-06, 1.284874687560177e-06, 1.333958644881932e-06, 1.384917316716389e-06, 1.43782228280982e-06, 1.492747854448526e-06, 1.549771178600918e-06, 1.60897234602239e-06, 1.670434503477097e-06, 1.734243970227021e-06, 1.800490358958397e-06, 1.869266701305788e-06, 1.940669578153763e-06, 2.014799254894922e-06, 2.09175982183366e-06, 2.171659339928187e-06, 2.254609992076466e-06, 2.340728240154616e-06, 2.430134988024742e-06, 2.522955750740522e-06, 2.619320830183602e-06, 2.719365497374937e-06, 2.823230181714224e-06, 2.93106066740854e-06, 3.04300829736344e-06, 3.159230184818292e-06, 3.279889433017716e-06, 3.405155363227302e-06, 3.53520375140013e-06, 3.670217073831684e-06, 3.810384762137306e-06, 3.955903467901036e-06, 4.106977337368512e-06, 4.263818296557792e-06, 4.426646347182503e-06, 4.595689873796232e-06, 4.771185962579772e-06, 4.953380732210459e-06, 5.142529677271234e-06, 5.338898024667217e-06, 5.542761103544475e-06, 5.754404729215827e-06, 5.974125601621774e-06, 6.202231718874839e-06, 6.439042806452228e-06, 6.684890762624964e-06, 6.940120120734993e-06, 7.205088528952065e-06, 7.480167248167955e-06, 7.76574166870104e-06, 8.06221184653343e-06, 8.36999305978811e-06, 8.689516386230206e-06, 9.02122930255915e-06, 9.365596306318357e-06, 9.72309956126624e-06, 1.009423956708063e-05, 1.047953585431336e-05, 1.087952770553142e-05, 1.129477490362706e-05, 1.172585850830065e-05, 1.217338166178069e-05, 1.263797042485561e-05, 1.312027464435483e-05, 1.362096885324217e-05, 1.414075320453911e-05, 1.468035444033228e-05, 1.524052689716235e-05, 1.582205354915334e-05, 1.642574709027072e-05, 1.705245105715934e-05, 1.770304099407015e-05, 1.837842566141842e-05, 1.907954828959124e-05, 1.980738787967918e-05, 2.056296055284298e-05, 2.134732095013637e-05, 2.216156368459949e-05, 2.300682484759197e-05, 2.388428357130911e-05, 2.479516364958105e-05, 2.574073521906024e-05, 2.672231650302462e-05, 2.774127562007787e-05, 2.879903246011547e-05, 2.989706063000441e-05, 3.103688947152977e-05, 3.222010615421107e-05, 3.344835784573516e-05, 3.472335396279361e-05, 3.60468685052543e-05, 3.742074247666896e-05, 3.884688639424325e-05, 4.032728289148104e-05, 4.186398941684991e-05, 4.345914103190815e-05, 4.511495331246804e-05, 4.683372535647465e-05, 4.861784290242116e-05, 5.046978156224735e-05, 5.239211017277789e-05, 5.438749426993302e-05, 5.645869969005283e-05, 5.860859630284221e-05, 6.084016188056131e-05, 6.315648610829526e-05, 6.556077474022617e-05, 6.805635390704987e-05, 7.064667457981355e-05, 7.33353171956336e-05, 7.6125996450922e-05, 7.902256626793603e-05, 8.202902494065844e-05, 8.514952046616715e-05, 8.838835606791334e-05, 9.174999591745485e-05, 9.523907106144388e-05, 9.886038556086523e-05, 0.0001026189228497269, 0.0001065198523206166, 0.0001105685361447971, 0.0001147705363346891, 0.0001191316220568709, 0.0001236577772039265, 0.0001283552082337373, 0.0001332303522850512, 0.0001382898855783973, 0.0001435407321117116, 0.0001489900726602561, 0.0001546453540906759, 0.0001605142989993601, 0.0001666049156854535, 0.0001729255084692322, 0.0001794846883667449, 0.0001862913841319823, 0.0001933548536780338, 0.0002006846958890373, 0.0002082908628349861, 0.0002161836724017234, 0.0002243738213487561, 0.0002328723988078201, 0.000241690900235347, 0.0002508412418323385, 0.0002603357754453582, 0.0002701873039626852, 0.000280409097219888, 0.0002910149084293557, 0.0003020189911485917, 0.0003134361168022925, 0.0003252815927734831, 0.0003375712810792079, 0.0003503216176464476, 0.0003635496322041939, 0.0003772729688077195, 0.0003915099070112919, 0.0004062793837056829, 0.0004216010156369616, 0.000437495122623143, 0.0004539827514852998, 0.0004710857007098349, 0.0004888265458584998, 0.0005072286657428418, 0.0005263162693795392, 0.0005461144237430494, 0.0005666490823317729, 0.0005879471145637111, 0.0006100363360173047, 0.0006329455395328105, 0.000656704527189091, 0.0006813441431702914, 0.0007068963075362088, 0.0007333940509095653, 0.000760871550092621, 0.0007893641646247167, 0.0008189084742913825, 0.0008495423175945961, 0.0008813048311925086, 0.0009142364903157808, 0.0009483791501660337, 0.0009837760883004516, 0.00102047204800471, 0.001058513282654505, 0.001097947601063813, 0.001138824413815723, 0.001181194780569087, 0.00122511145833159, 0.001270628950686742, 0.001317803557959182, 0.00136669342829911, 0.001417358609662919, 0.001469861102663032, 0.001524264914255551, 0.001580636112229653, 0.001639042880457542, 0.001699555574858369, 0.001762246780023709, 0.001827191366445901, 0.001894466548283947, 0.001964151941594472, 0.002036329622947694, 0.00211108418834019, 0.002188502812307548, 0.002268675307130997, 0.002351694182022043, 0.002437654702158936, 0.002526654947437714, 0.002618795870788811, 0.002714181355897946, 0.002812918274156773, 0.002915116540654994, 0.003020889169010984, 0.003130352324822401, 0.003243625377502272, 0.003360830950248498, 0.003482094967876971, 0.00360754670222947, 0.003737318814847394, 0.00387154739658187, 0.004010372003788573, 0.00415393569073283, 0.004302385037806826, 0.004455870175135344, 0.004614544801120998, 0.004778566195452413, 0.00494809522607076, 0.005123296349561046, 0.005304337604403803, 0.005491390596492094, 0.005684630476285799, 0.005884235906942268, 0.006090389022727767, 0.006303275376979018, 0.00652308387884815, 0.006750006718027323, 0.006984239276611723, 0.007225980027221641, 0.007475430416465195, 0.0077327947327848, 0.007998279957690767, 0.008272095599346213, 0.00855445350742853, 0.008845567668153347, 0.009145653978309484, 0.009454929997115297, 0.009773614674671183, 0.01010192805574836, 0.01044009095762069, 0.01078832462061699, 0.01114685033004202, 0.0115158890080911, 0.01189566077436167, 0.01228638447354836, 0.01268827716889755, 0.01310155359998974, 0.0135264256034195, 0.01396310149494961, 0.01441178541173022, 0.01487267661319952, 0.01534596873931431, 0.01583184902480402, 0.01633049746819752, 0.01684208595444099, 0.01736677733000829, 0.01790472442950339, 0.01845606905286934, 0.0190209408924512, 0.01959945640931232, 0.02019171765837664, 0.02079781106216458, 0.02141780613310911, 0.02205175414468298, 0.02269968675183888, 0.02336161456156416, 0.02403752565468074, 0.02472738406038167, 0.02543112818538956, 0.02614866920004901, 0.02687988938412999, 0.02762464043561665, 0.02838274174629547, 0.02915397864853072, 0.02993810063823012, 0.03073481957965788, 0.03154380789844401, 0.03236469676987055, 0.03319707431028525, 0.03404048378029709, 0.0348944218092509, 0.0357583366513488, 0.03663162648468823, 0.03751363776541369, 0.03840366365012446, 0.03930094250064443, 0.04020465648622798, 0.04111393029924684, 0.04202783000136481, 0.04294536201814962, 0.04386547230098628, 0.04478704567602629, 0.0457089054007225, 0.04662981294924332, 0.04754846804871143, 0.04846350898876341, 0.04937351322734139, 0.05027699831590242, 0.05117242316732843, 0.05205818968972326, 0.05293264480896694, 0.0537940829023305, 0.05464074866461597, 0.05547084042714394, 0.05628251394843764, 0.05707388669362373, 0.057843042617353, 0.05858803746241852, 0.05930690458318774, 0.05999766129944628, 0.06065831578226086, 0.06128687446898649, 0.06188134999956549, 0.06243976966078899, 0.06296018431921573, 0.06344067781698974, 0.06387937679788096, 0.06427446092353119, 0.06462417343216528, 0.06492683198398139, 0.06518083972914111, 0.06538469652582442, 0.06553701022730299, 0.06563650794853969, 0.06568204721457735, 0.06567262688509404, 0.06560739774214305, 0.06548567262145229, 0.06530693596192409, 0.06507085264336594, 0.0647772759792061, 0.06442625472922596, 0.06401803899738487, 0.06355308488182301, 0.0630320577482933, 0.06245583400473811, 0.06182550126361569, 0.06114235678994654, 0.06040790414689564, 0.0596238479669417, 0.05879208679513225, 0.05791470397129249, 0.05699395653992299, 0.05603226219932967, 0.05503218432455909, 0.05399641512107835, 0.05292775698678993, 0.05182910217766448, 0.05070341088561835, 0.04955368784468334, 0.04838295758132803, 0.04719423841519851, 0.04599051529571299, 0.04477471152605937, 0.04354965937750482, 0.04231806953205035, 0.0410824992092326, 0.03984531873266969, 0.03860867617385743, 0.03737445957572544, 0.03614425610866692, 0.0349193073506508, 0.03370045971571326, 0.03248810888861066, 0.03128267614799823, 0.03008634522652168, 0.02890172728658835, 0.02773134643000866, 0.02657761939000883, 0.02544283514474729, 0.02432913470974138, 0.02323849140043915, 0.02217269189115831, 0.02113331843243349, 0.02012173262504191, 0.01913906118513445, 0.01818618417042137, 0.01726372617158453, 0.01637205100524808, 0.01551126047409768, 0.01468119778511681, 0.01388145623736164, 0.01311139380505508, 0.0123701542487766, 0.01165669538581371, 0.01096982513886699, 0.01030824595875995, 0.009670608180021772, 0.009055572816578511, 0.00846188423669128, 0.007888453070119289, 0.007334449594718545, 0.006799407722876599, 0.006283339559021328, 0.00578686032682324, 0.005311061378834164, 0.004856518507434543, 0.004423545601864837, 0.004012374856587397, 0.003623155425592411, 0.003255952524155892, 0.002910747001714314, 0.002587435407215732, 0.002285830565668334, 0.002005662681640458, 0.001746580982127072, 0.00150815590753442, 0.001289881855506408, 0.001091180477946208, 0.0009114045268888149, 0.0007498422398353466, 0.000605722249827273, 0.0004782189999023301, 0.0003664586356904032, 0.0002695253438258216, 0.000186468097594622, 0.0001163077649219274, 5.804452747243394e-05, 1.066555341740216e-05, -2.684713957926133e-05, -5.550870027164201e-05, -7.63234129331769e-05, -9.027669851169318e-05, -9.832723050925973e-05, -0.0001013992511698092, -0.000100375175460965, -9.608857110458835e-05, -8.931760247619935e-05, -8.07790244704426e-05, -7.112280938921522e-05, -6.092748552780081e-05, -5.06962604181066e-05, -4.085399466401129e-05, -3.174508402435661e-05, -2.363229792139154e-05, -1.669661197276767e-05, -1.103806054530976e-05, -6.67762283867307e-06, -3.560142738341654e-06, -1.558268780899041e-06, -4.773862017082298e-07, -6.149835531099601e-08, -6.938893903907228e-18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 1, + "j": 2 + }, + { + "i": 0, + "radial_function": [ + 1.245824758443329e-10, 1.29343019131429e-10, 1.342854721611278e-10, 1.394167860600117e-10, 1.447441775705784e-10, 1.502751392009208e-10, 1.560174497622465e-10, 1.619791853090551e-10, 1.681687304973557e-10, 1.745947903769074e-10, 1.812664026340562e-10, 1.881929503023963e-10, 1.953841749591266e-10, 2.028501904256628e-10, 2.106014969917764e-10, 2.186489961832568e-10, 2.270040060938772e-10, 2.356782773032188e-10, 2.446840094027413e-10, 2.54033868153347e-10, 2.637410032985578e-10, 2.73819067058369e-10, 2.842822333297835e-10, 2.951452176210292e-10, 3.064232977474986e-10, 3.181323353185093e-10, 3.302887980451158e-10, 3.429097829003344e-10, 3.560130401643603e-10, 3.696169983885928e-10, 3.837407903135707e-10, 3.984042797772806e-10, 4.136280896516694e-10, 4.294336308466566e-10, 4.458431324224397e-10, 4.628796728524289e-10, 4.805672124807944e-10, 4.989306272202646e-10, 5.179957435375677e-10, 5.377893747757238e-10, 5.583393588642579e-10, 5.796745974703857e-10, 6.018250966462186e-10, 6.248220090291596e-10, 6.486976776548435e-10, 6.734856814442163e-10, 6.99220882428758e-10, 7.259394747802389e-10, 7.536790357139742e-10, 7.82478578337164e-10, 8.123786065166293e-10, 8.434211718431354e-10, 8.756499327723844e-10, 9.091102160258651e-10, 9.438490803379254e-10, 9.799153826386679e-10, 1.017359846765811e-09, 1.056235134802098e-09, 1.096595921138598e-09, 1.138498969368068e-09, 1.182003212116474e-09, 1.227169833924993e-09, 1.274062357299022e-09, 1.322746732045209e-09, 1.373291428022171e-09, 1.425767531435286e-09, 1.480248844811037e-09, 1.536811990791469e-09, 1.595536519894723e-09, 1.656505022393247e-09, 1.719803244466919e-09, 1.785520208794541e-09, 1.853748339753218e-09, 1.924583593401713e-09, 1.998125592430604e-09, 2.074477766268944e-09, 2.153747496544554e-09, 2.236046268102444e-09, 2.321489825793758e-09, 2.410198337255746e-09, 2.502296561911625e-09, 2.597914026428087e-09, 2.6971852068771e-09, 2.800249717858223e-09, 2.907252508847406e-09, 3.018344068048317e-09, 3.133680634032979e-09, 3.253424415469235e-09, 3.377743819244105e-09, 3.506813687303819e-09, 3.640815542543515e-09, 3.779937844092507e-09, 3.924376252354018e-09, 4.074333904172119e-09, 4.230021698512861e-09, 4.391658593061208e-09, 4.55947191215104e-09, 4.733697666461078e-09, 4.914580884926379e-09, 5.102375959332162e-09, 5.297347002074365e-09, 5.499768217590278e-09, 5.709924287981344e-09, 5.928110773370445e-09, 6.154634527556791e-09, 6.389814129552521e-09, 6.633980331608352e-09, 6.887476524357796e-09, 7.150659219734318e-09, 7.423898552340416e-09, 7.707578799973424e-09, 8.002098924040308e-09, 8.307873130621118e-09, 8.625331452970035e-09, 8.954920356273257e-09, 9.297103365513722e-09, 9.652361717325922e-09, 1.002119503675714e-08, 1.040412203988666e-08, 1.080168126329116e-08, 1.121443182138144e-08, 1.164295419267607e-08, 1.208785103611677e-08, 1.254974803857387e-08, 1.302929479473317e-08, 1.35271657206012e-08, 1.404406100191382e-08, 1.458070757878104e-08, 1.513786016795275e-08, 1.57163023241428e-08, 1.6316847541903e-08, 1.694034039959714e-08, 1.758765774708251e-08, 1.825970993876933e-08, 1.895744211379162e-08, 1.968183552508869e-08, 2.043390891926663e-08, 2.121471996917943e-08, 2.202536676124336e-08, 2.286698933957641e-08, 2.374077130913243e-08, 2.464794150008495e-08, 2.558977569579964e-08, 2.656759842682485e-08, 2.758278483342253e-08, 2.863676259925684e-08, 2.973101395896003e-08, 3.086707778239681e-08, 3.204655173855708e-08, 3.327109454211978e-08, 3.454242828584404e-08, 3.586234086206816e-08, 3.723268847671876e-08, 3.865539825936443e-08, 4.013247097298296e-08, 4.166598382724947e-08, 4.32580933993016e-08, 4.491103866608605e-08, 4.662714415254798e-08, 4.840882320008958e-08, 5.025858135988929e-08, 5.21790199158534e-08, 5.417283954214915e-08, 5.624284410046051e-08, 5.839194458230421e-08, 6.062316320194258e-08, 6.293963764565171e-08, 6.534462548330855e-08, 6.784150874850247e-08, 7.043379869360554e-08, 7.312514072648021e-08, 7.591931953576848e-08, 7.882026441195596e-08, 8.183205477169372e-08, 8.495892589313847e-08, 8.820527487036494e-08, 9.157566679522711e-08, 9.507484117533966e-08, 9.870771859720848e-08, 1.024794076438661e-07, 1.063952120767272e-07, 1.104606382917627e-07, 1.146814030604558e-07, 1.190634415664222e-07, 1.236129157489811e-07, 1.283362229653912e-07, 1.332400049839327e-07, 1.383311573204487e-07, 1.436168389314738e-07, 1.491044822775585e-07, 1.548018037709161e-07, 1.607168146220807e-07, 1.668578321007885e-07, 1.732334912269107e-07, 1.798527569078511e-07, 1.867249365394407e-07, 1.938596930880436e-07, 2.012670586722174e-07, 2.089574486630143e-07, 2.169416763227151e-07, 2.252309680025329e-07, 2.338369789206487e-07, 2.42771809542694e-07, 2.520480225876994e-07, 2.616786606833672e-07, 2.716772646954397e-07, 2.820578927569111e-07, 2.928351400237582e-07, 3.040241591849345e-07, 3.156406817554043e-07, 3.277010401820661e-07, 3.402221907936685e-07, 3.532217376267435e-07, 3.667179571612139e-07, 3.807298240001215e-07, 3.95277037529775e-07, 4.103800495974164e-07, 4.260600932454313e-07, 4.42339212542415e-07, 4.592402935526457e-07, 4.767870964878339e-07, 4.950042890857774e-07, 5.139174812630904e-07, 5.335532610902778e-07, 5.539392321398548e-07, 5.75104052259631e-07, 5.970774738256716e-07, 6.198903855311994e-07, 6.435748557701457e-07, 6.681641776760447e-07, 6.936929158792196e-07, 7.201969550483121e-07, 7.477135502833027e-07, 7.76281379431342e-07, 8.059405973983213e-07, 8.367328925322318e-07, 8.687015451573881e-07, 9.018914883412711e-07, 9.363493709792574e-07, 9.721236232854262e-07, 1.009264524780994e-06, 1.047824274875761e-06, 1.087857066141144e-06, 1.129419160377133e-06, 1.172568967580327e-06, 1.217367127921702e-06, 1.263876596852035e-06, 1.312162733449332e-06, 1.362293392136482e-06, 1.414339017894353e-06, 1.468372745103332e-06, 1.524470500154274e-06, 1.582711107968543e-06, 1.643176402578028e-06, 1.705951341919575e-06, 1.771124127002975e-06, 1.838786325620875e-06, 1.909033000769948e-06, 1.981962843966613e-06, 2.057678313639477e-06, 2.136285778795685e-06, 2.217895668155903e-06, 2.302622624971864e-06, 2.390585667739191e-06, 2.481908357028242e-06, 2.576718968668605e-06, 2.675150673527106e-06, 2.777341724127975e-06, 2.883435648378086e-06, 2.993581450665293e-06, 3.107933820609125e-06, 3.226653349754997e-06, 3.349906756514814e-06, 3.477867119663593e-06, 3.610714120719264e-06, 3.748634295540593e-06, 3.891821295495679e-06, 4.040476158559751e-06, 4.194807590721541e-06, 4.355032258088853e-06, 4.521375090095315e-06, 4.694069594233513e-06, 4.873358182747769e-06, 5.059492511740418e-06, 5.252733833159483e-06, 5.453353360159719e-06, 5.661632646337411e-06, 5.877863979367215e-06, 6.102350789586408e-06, 6.335408074082884e-06, 6.577362836890968e-06, 6.82855454587948e-06, 7.089335606983979e-06, 7.360071856426492e-06, 7.641143071601289e-06, 7.932943501340196e-06, 8.235882416273276e-06, 8.550384680059848e-06, 8.876891342263444e-06, 9.215860253691668e-06, 9.567766705045538e-06, 9.933104089759444e-06, 1.031238459193393e-05, 1.070613990031771e-05, 1.11149219493047e-05, 1.153930368797313e-05, 1.197987987821511e-05, 1.243726792304754e-05, 1.291210872624034e-05, 1.340506758444222e-05, 1.391683511300735e-05, 1.444812820680249e-05, 1.499969103730051e-05, 1.557229608731538e-05, 1.616674522478519e-05, 1.678387081707874e-05, 1.742453688732416e-05, 1.808964031434119e-05, 1.878011207779141e-05, 1.949691855025975e-05, 2.024106283798691e-05, 2.101358617209285e-05, 2.181556935215409e-05, 2.264813424409298e-05, 2.351244533439767e-05, 2.440971134276798e-05, 2.534118689535615e-05, 2.63081742608469e-05, 2.731202515172719e-05, 2.835414259312932e-05, 2.943598286178556e-05, 3.055905749766445e-05, 3.17249353909808e-05, 3.293524494737655e-05, 3.419167633413946e-05, 3.549598381045838e-05, 3.684998814480866e-05, 3.825557912268264e-05, 3.971471814796373e-05, 4.12294409414091e-05, 4.280186033979481e-05, 4.443416919940311e-05, 4.612864340768347e-05, 4.788764500705068e-05, 4.971362543487465e-05, 5.160912888396672e-05, 5.357679578788377e-05, 5.561936643564488e-05, 5.773968472052955e-05, 5.994070202783686e-05, 6.222548126664509e-05, 6.45972010507677e-05, 6.705916003430565e-05, 6.961478140738116e-05, 7.226761755779175e-05, 7.502135490459889e-05, 7.787981890976864e-05, 8.084697927429046e-05, 8.392695532534788e-05, 8.712402160136534e-05, 9.044261364199479e-05, 9.388733399032025e-05, 9.746295841481684e-05, 0.0001011744423588424, 0.0001050269276257334, 0.0001090257493077563, 0.0001131764429675707, 0.0001174847520809948, 0.0001219566357502885, 0.0001265982766973591, 0.0001314160895466644, 0.000136416729407878, 0.0001416071007687038, 0.0001469943667085542, 0.0001525859584441613, 0.0001583895852185017, 0.0001644132445448111, 0.0001706652328178074, 0.000177154156304583, 0.000183888942528107, 0.0001908788520565185, 0.0001981334907119188, 0.0002056628222127111, 0.000213477181263975, 0.0002215872871107511, 0.0002300042575695899, 0.0002387396235541126, 0.0002478053441108095, 0.0002572138219817229, 0.0002669779197111468, 0.0002771109763139327, 0.0002876268245234589, 0.0002985398086377837, 0.0003098648029830401, 0.0003216172310135329, 0.0003338130850685786, 0.0003464689468065372, 0.0003596020083370624, 0.0003732300940730475, 0.0003873716833242068, 0.0004020459336548566, 0.0004172727050288047, 0.0004330725847648374, 0.000449466913326775, 0.0004664778109725273, 0.000484128205287053, 0.0005024418596246258, 0.0005214434024862017, 0.0005411583578581857, 0.0005616131765392703, 0.0005828352684824641, 0.0006048530361797574, 0.0006276959091172561, 0.0006513943793289982, 0.0006759800380778404, 0.0007014856136922139, 0.0007279450105876386, 0.0007553933495021715, 0.0007838670089750275, 0.0008134036680977688, 0.0008440423505673676, 0.0008758234700705529, 0.0009087888770286221, 0.0009429819067317719, 0.0009784474288917645, 0.001015231898641291, 0.001053383409008073, 0.001092951744891074, 0.001133988438565564, 0.001176546826743029, 0.001220682109210882, 0.001266451409075983, 0.001313913834634608, 0.001363130542890275, 0.001414164804739106, 0.001467082071840718, 0.001521950045190687, 0.001578838745408291, 0.001637820584751013, 0.001698970440864411, 0.001762365732273104, 0.001828086495615341, 0.00189621546462004, 0.001966838150821334, 0.002040042926001409, 0.002115921106347781, 0.002194567038306154, 0.002276078186104531, 0.00236055522091848, 0.002448102111640962, 0.002538826217213254, 0.002632838380466277, 0.002730253023413425, 0.002831188243927556, 0.002935765913725658, 0.003044111777574683, 0.003156355553621567, 0.003272631034739059, 0.003393076190767057, 0.003517833271515937, 0.003647048910384838, 0.003780874228433065, 0.00391946493872697, 0.004062981450768397, 0.004211588974792641, 0.004365457625705393, 0.004524762526408152, 0.00468968391024021, 0.00486040722224344, 0.005037123218931967, 0.005220028066224094, 0.005409323435167397, 0.005605216595059925, 0.005807920503541416, 0.006017653893197208, 0.006234641354185336, 0.006459113412363065, 0.006691306602353239, 0.006931463534953639, 0.007179832958252933, 0.007436669811775896, 0.007702235272937724, 0.007976796795042441, 0.008260628136013897, 0.008554009376999398, 0.008857226929935454, 0.009170573533113404, 0.009494348233727811, 0.009828856356335644, 0.01017440945609571, 0.01053132525559891, 0.01089992756403881, 0.01128054617740906, 0.01167351675835084, 0.01207918069420747, 0.01249788493177733, 0.01292998178718864, 0.01337582872925111, 0.0138357881345715, 0.01431022701265067, 0.01479951669911071, 0.01530403251513333, 0.01582415339112164, 0.01636026145253311, 0.01691274156576561, 0.01748198084191697, 0.01806836809618001, 0.01867229326057828, 0.01929414674769846, 0.01993431876302818, 0.02059319856346899, 0.02127117365956185, 0.02196862895893673, 0.02268594584848474, 0.02342350121274401, 0.02418166638599896, 0.02496080603561264, 0.0257612769741449, 0.02658342689786284, 0.02742759304931646, 0.02829410080174294, 0.02918326216317236, 0.03009537419824257, 0.03103071736589133, 0.03198955377128109, 0.03297212533053168, 0.03397865184708691, 0.03500932899882783, 0.03606432623537154, 0.03714378458535808, 0.03824781437393756, 0.03937649285112445, 0.04052986173218761, 0.04170792465180148, 0.04291064453429017, 0.0441379408829628, 0.04538968699226181, 0.04666570708723096, 0.04796577339566056, 0.049289603159179, 0.05063685559054271, 0.05200712878542467, 0.05339995659811836, 0.05481480549176281, 0.05625107137494637, 0.05770807643787017, 0.0591850660026399, 0.06068120540370151, 0.06219557691594806, 0.06372717674958293, 0.06527491213243412, 0.06683759850206329, 0.06841395683168695, 0.07000261111562932, 0.07160208604172345, 0.07321080487977626, 0.0748270876168789, 0.07644914937196938, 0.07807509912361381, 0.07970293878644052, 0.08133056267301324, 0.08295575737913886, 0.08457620213163226, 0.08618946963838327, 0.08779302748113685, 0.08938424009168128, 0.09096037135209098, 0.09251858785924254, 0.09405596289298407, 0.09556948112601618, 0.09705604411171508, 0.09851247658372377, 0.09993553359811695, 0.1013219085452597, 0.1026682420540749, 0.1039711318062727, 0.1052271432721355, 0.1064328213726483, 0.1075847030651111, 0.1086793308408171, 0.109713267113947, 0.1106831094705014, 0.1115855067348886, 0.1124171757997528, 0.1131749191517969, 0.1138556430128206, 0.1144563760010489, 0.1149742882031819, 0.1154067105326279, 0.1157511542342443, 0.1160053303808375, 0.1161671691918957, 0.1162348389908277, 0.11620676460367, 0.11608164499013, 0.1158584698873371, 0.1155365352381568, 0.1151154571697938, 0.1145951842850973, 0.1139760080288979, 0.1132585708952682, 0.1124438722491926, 0.1115332715481086, 0.1105284887654242, 0.1094316018396335, 0.1082450409991494, 0.1069715798444069, 0.1056143231049762, 0.1041766910299732, 0.102662400414348, 0.1010754423108066, 0.09942005652602258, 0.09770070304894513, 0.09592203060660807, 0.09408884258672934, 0.09220606060403108, 0.09027868601574446, 0.08831175970797543, 0.08631032047500348, 0.0842793622944642, 0.08222379075886613, 0.08014837885420273, 0.07805772217586694, 0.07595619353738113, 0.0738478967559801, 0.07173661918912513, 0.06962578234716774, 0.06751838962090413, 0.06541696984207343, 0.06332351504598983, 0.06123941043760234, 0.05916535418814307, 0.05710210602773161, 0.05505318458791061, 0.05302275433546466, 0.05101481847811987, 0.04903318806203114, 0.04708145155744062, 0.04516294537567683, 0.04328072580410885, 0.04143754288804691, 0.03963581682899409, 0.03787761750606228, 0.03616464776060413, 0.03449823111190647, 0.03287930459269531, 0.0313084174056604, 0.02978573610453644, 0.0283110569936886, 0.02688382641672085, 0.025503169565433, 0.02416792838343343, 0.02287670906188388, 0.02162793952620432, 0.02041993719018395, 0.01925098710610068, 0.01811943046458159, 0.01702376319479734, 0.01596274418336288, 0.01493551236857349, 0.01394171167566566, 0.01298162243951828, 0.01205629761541421, 0.0111673643408377, 0.01031572654982663, 0.009501908700642242, 0.008726291842249989, 0.007989112242724389, 0.00729046086160981, 0.006630283697511313, 0.006008383036330001, 0.005424419619065618, 0.004877915741001687, 0.004368259286369081, 0.003894708694310078, 0.003456398843151243, 0.003052347830717839, 0.002681464618731644, 0.002342557499313225, 0.002034343331359101, 0.001755457484182876, 0.001504464415443343, 0.001279868800154127, 0.001080127117686069, 0.0009036595942712861, 0.0007488623898438823, 0.0006141199102910749, 0.0004978171195670622, 0.0003983517208787229, 0.0003141460724706835, 0.0002436587016513281, 0.0001853952807672476, 0.0001379189309908824, 9.985972417257682e-05, 6.992325964193247e-05, 4.689820178226112e-05, 2.966267538942469e-05, 1.718942917722927e-05, 8.549693212379916e-06, 2.91567333184084e-06, -4.383554453590843e-07, -2.136374125716323e-06, -2.702359675706145e-06, -2.563413420943877e-06, -2.054228807030212e-06, -1.422826406357464e-06, -8.374612907036116e-07, -3.945857655063278e-07, -1.277293695603476e-07, -1.713845834867833e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 1, + "j": 3 + }, + { + "i": 1, + "radial_function": [ + -3.54441717872366e-08, -3.634144581557059e-08, -3.726143447215397e-08, -3.82047127819781e-08, -3.917187032693528e-08, -4.016351161433193e-08, -4.118025645473136e-08, -4.222274034936181e-08, -4.329161488733172e-08, -4.438754815290143e-08, -4.551122514306417e-08, -4.66633481956995e-08, -4.784463742856513e-08, -4.905583118940222e-08, -5.029768651743606e-08, -5.157097961655877e-08, -5.287650634049207e-08, -5.421508269023208e-08, -5.558754532408693e-08, -5.699475208062722e-08, -5.843758251487441e-08, -5.9916938448064e-08, -6.143374453132637e-08, -6.29889488236375e-08, -6.458352338440147e-08, -6.621846488103398e-08, -6.78947952119284e-08, -6.9613562145192e-08, -7.137583997355321e-08, -7.318273018584802e-08, -7.503536215550575e-08, -7.693489384646488e-08, -7.888251253696011e-08, -8.087943556163203e-08, -8.292691107242559e-08, -8.502621881875029e-08, -8.717867094739153e-08, -8.938561282267325e-08, -9.164842386738322e-08, -9.396851842498859e-08, -9.634734664367793e-08, -9.878639538278548e-08, -1.012871891421622e-07, -1.038512910150752e-07, -1.064803036652319e-07, -1.091758703285375e-07, -1.119396758402151e-07, -1.147734476879282e-07, -1.176789570915645e-07, -1.206580201103578e-07, -1.237124987780354e-07, -1.268443022667076e-07, -1.300553880802199e-07, -1.333477632777188e-07, -1.36723485728195e-07, -1.401846653967851e-07, -1.437334656636416e-07, -1.47372104676191e-07, -1.511028567356271e-07, -1.549280537185078e-07, -1.588500865343405e-07, -1.62871406620071e-07, -1.669945274724081e-07, -1.712220262189426e-07, -1.755565452290446e-07, -1.800007937655405e-07, -1.845575496782106e-07, -1.892296611401597e-07, -1.940200484281481e-07, -1.98931705747999e-07, -2.03967703106217e-07, -2.091311882289952e-07, -2.14425388529807e-07, -2.198536131268106e-07, -2.254192549113349e-07, -2.311257926687303e-07, -2.369767932529189e-07, -2.429759138159995e-07, -2.491269040943025e-07, -2.554336087523245e-07, -2.61899969786005e-07, -2.68530028986856e-07, -2.753279304684732e-07, -2.822979232570221e-07, -2.894443639473072e-07, -2.967717194260914e-07, -3.042845696643684e-07, -3.119876105803308e-07, -3.198856569748286e-07, -3.279836455411506e-07, -3.362866379510097e-07, -3.447998240186671e-07, -3.535285249451688e-07, -3.624781966447265e-07, -3.716544331553226e-07, -3.810629701356688e-07, -3.907096884507103e-07, -4.006006178479177e-07, -4.107419407266583e-07, -4.211399960030178e-07, -4.318012830724728e-07, -4.427324658729086e-07, -4.539403770505118e-07, -4.654320222311497e-07, -4.772145843999094e-07, -4.892954283915228e-07, -5.016821054945084e-07, -5.143823581718885e-07, -5.274041249014486e-07, -5.407555451385634e-07, -5.544449644046842e-07, -5.68480939504688e-07, -5.828722438763391e-07, -5.976278730752144e-07, -6.127570503985253e-07, -6.28269232651347e-07, -6.44174116058872e-07, -6.604816423283855e-07, -6.772020048647468e-07, -6.943456551432777e-07, -7.119233092440316e-07, -7.299459545515482e-07, -7.484248566242693e-07, -7.673715662379289e-07, -7.86797926607317e-07, -8.067160807909289e-07, -8.271384792831554e-07, -8.480778877987448e-07, -8.69547395254416e-07, -8.915604219526251e-07, -9.141307279725884e-07, -9.37272421773835e-07, -9.609999690176632e-07, -9.853282016120216e-07, -1.010272326985486e-06, -1.035847937596119e-06, -1.062071020681185e-06, -1.088957968253819e-06, -1.11652558735289e-06, -1.144791110552519e-06, -1.173772206737766e-06, -1.203486992153302e-06, -1.23395404173194e-06, -1.265192400710134e-06, -1.297221596537724e-06, -1.330061651089362e-06, -1.363733093185309e-06, -1.398256971429407e-06, -1.433654867372272e-06, -1.469948909007993e-06, -1.50716178461272e-06, -1.545316756933887e-06, -1.584437677738897e-06, -1.62454900273243e-06, -1.665675806851696e-06, -1.707843799949209e-06, -1.751079342872942e-06, -1.795409463953913e-06, -1.840861875911543e-06, -1.88746499318739e-06, -1.935247949718099e-06, -1.98424061715875e-06, -2.034473623567982e-06, -2.085978372566651e-06, -2.138787062982012e-06, -2.192932708989708e-06, -2.248449160766314e-06, -2.305371125665216e-06, -2.363734189929264e-06, -2.423574840953751e-06, -2.484930490113619e-06, -2.547839496169402e-06, -2.612341189266347e-06, -2.678475895541974e-06, -2.746284962357443e-06, -2.815810784168471e-06, -2.887096829052288e-06, -2.960187665906954e-06, -3.035128992340398e-06, -3.111967663266561e-06, -3.190751720226537e-06, -3.271530421453343e-06, -3.354354272698939e-06, -3.439275058843053e-06, -3.526345876303627e-06, -3.615621166269129e-06, -3.707156748773878e-06, -3.801009857637481e-06, -3.897239176290586e-06, -3.995904874509405e-06, -4.097068646081993e-06, -4.200793747430228e-06, -4.307145037211467e-06, -4.416189016925043e-06, -4.527993872549095e-06, -4.642629517233733e-06, -4.76016763507782e-06, -4.880681726016451e-06, -5.00424715184777e-06, -5.130941183427965e-06, -5.260843049064011e-06, -5.394033984135071e-06, -5.530597281973288e-06, -5.670618346036473e-06, -5.814184743405391e-06, -5.961386259639255e-06, -6.112314955024394e-06, -6.267065222251116e-06, -6.425733845555495e-06, -6.588420061363337e-06, -6.755225620474359e-06, -6.926254851826349e-06, -7.101614727878969e-06, -7.281414931658989e-06, -7.465767925509138e-06, -7.654789021583833e-06, -7.848596454136857e-06, -8.047311453646078e-06, -8.25105832282258e-06, -8.459964514552175e-06, -8.67416071181832e-06, -8.893780909657742e-06, -9.11896249919981e-06, -9.349846353843639e-06, -9.586576917627185e-06, -9.829302295844214e-06, -1.007817434796722e-05, -1.03333487829345e-05, -1.059498525686232e-05, -1.086324747324431e-05, -1.113830328570114e-05, -1.142032480334669e-05, -1.170948849883666e-05, -1.200597531916922e-05, -1.230997079930776e-05, -1.262166517869788e-05, -1.294125352075358e-05, -1.32689358353878e-05, -1.36049172046661e-05, -1.394940791166356e-05, -1.430262357260648e-05, -1.466478527238448e-05, -1.503611970351796e-05, -1.541685930867117e-05, -1.580724242680121e-05, -1.620751344303646e-05, -1.661792294238101e-05, -1.703872786734262e-05, -1.747019167958603e-05, -1.791258452571498e-05, -1.836618340728901e-05, -1.883127235518492e-05, -1.93081426084141e-05, -1.979709279751145e-05, -2.02984291326133e-05, -2.08124655963458e-05, -2.133952414164787e-05, -2.187993489465632e-05, -2.243403636278402e-05, -2.300217564812575e-05, -2.358470866632905e-05, -2.418200037107242e-05, -2.479442498429555e-05, -2.542236623233104e-05, -2.606621758809084e-05, -2.672638251946416e-05, -2.740327474408894e-05, -2.809731849066156e-05, -2.880894876695597e-05, -2.953861163472604e-05, -3.028676449167062e-05, -3.10538763606457e-05, -3.184042818631211e-05, -3.264691313941338e-05, -3.347383692888267e-05, -3.432171812198351e-05, -3.519108847269459e-05, -3.608249325855414e-05, -3.699649162618572e-05, -3.79336569457333e-05, -3.889457717443851e-05, -3.98798552296014e-05, -4.089010937117005e-05, -4.192597359421351e-05, -4.298809803153755e-05, -4.407714936671044e-05, -4.519381125777465e-05, -4.633878477192425e-05, -4.751278883144044e-05, -4.871656067118149e-05, -4.995085630793292e-05, -5.121645102193382e-05, -5.251413985090063e-05, -5.384473809688159e-05, -5.520908184628313e-05, -5.660802850341751e-05, -5.804245733793458e-05, -5.951327004650503e-05, -6.102139132913928e-05, -6.256776948053067e-05, -6.415337699682694e-05, -6.577921119824357e-05, -6.744629486794329e-05, -6.915567690762075e-05, -7.090843301024087e-05, -7.27056663503933e-05, -7.45485082927393e-05, -7.643811911903937e-05, -7.837568877426497e-05, -8.036243763231204e-05, -8.239961728184698e-05, -8.448851133283533e-05, -8.663043624431288e-05, -8.882674217398128e-05, -9.107881385022405e-05, -9.338807146715384e-05, -9.575597160332647e-05, -9.818400816476679e-05, -0.0001006737133529787, -0.0001032266586586253, -0.0001058444558815875, -0.000108528758178132, -0.0001112812611359367, -0.0001141037038777479, -0.0001169978701944657, -0.0001199655897084752, -0.0001230087390680671, -0.0001261292431738195, -0.0001293290764378324, -0.0001326102640767387, -0.0001359748834394418, -0.0001394250653705547, -0.0001429629956105539, -0.0001465909162336817, -0.0001503111271246695, -0.0001541259874953852, -0.0001580379174425384, -0.0001620493995476189, -0.0001661629805202728, -0.000170381272886362, -0.0001747069567219926, -0.0001791427814348291, -0.0001836915675940672, -0.0001883562088104648, -0.000193139673667887, -0.0001980450077078597, -0.0002030753354686724, -0.0002082338625806299, -0.000213523877919088, -0.0002189487558169719, -0.0002245119583385273, -0.000230217037616102, -0.0002360676382518293, -0.0002420674997861279, -0.0002482204592350051, -0.0002545304536982128, -0.0002610015230403611, -0.0002676378126471844, -0.0002744435762592042, -0.0002814231788851152, -0.000288581099797303, -0.000295921935611964, -0.0003034504034563995, -0.000311171344226123, -0.0003190897259345164, -0.0003272106471578587, -0.0003355393405786384, -0.0003440811766301686, -0.0003528416672456145, -0.0003618264697146463, -0.0003710413906510464, -0.0003804923900746926, -0.0003901855856114805, -0.0004001272568148367, -0.0004103238496126178, -0.0004207819808833148, -0.0004315084431655937, -0.0004425102095053712, -0.00045379443844474, -0.0004653684791572053, -0.0004772398767338622, -0.0004894163776252691, -0.000501905935243964, -0.0005147167157327141, -0.000527857103903765, -0.0005413357093545397, -0.0005551613727654165, -0.0005693431723853845, -0.0005838904307116202, -0.0005988127213691607, -0.0006141198761971188, -0.0006298219925480615, -0.0006459294408073974, -0.0006624528721398718, -0.0006794032264704682, -0.000696791740707275, -0.000714629957214131, -0.0007329297325410873, -0.0007517032464210329, -0.0007709630110410606, -0.0007907218805974485, -0.0008109930611434168, -0.0008317901207390954, -0.0008531269999134724, -0.0008750180224483593, -0.0008974779064947649, -0.0009205217760323599, -0.0009441651726830561, -0.000968424067890083, -0.0009933148754742438, -0.001018854464579424, -0.001045060173019772, -0.001071949821041304, -0.001099541725511121, -0.001127854714547717, -0.00115690814260632, -0.001186721906033515, -0.001217316459105829, -0.001248712830567345, -0.001280932640681785, -0.00131399811881489, -0.001347932121563348, -0.00138275815144685, -0.001418500376180303, -0.00145518364854357, -0.001492833526866474, -0.001531476296147199, -0.001571138989822524, -0.00161184941220873, -0.00165363616163228, -0.001696528654269701, -0.001740557148716407, -0.001785752771304383, -0.001832147542188955, -0.001879774402225035, -0.001928667240653332, -0.001978860923617243, -0.002030391323531046, -0.002083295349320205, -0.002137610977554356, -0.002193377284493574, -0.002250634479068223, -0.002309423936812438, -0.00236978823477095, -0.002431771187398422, -0.002495417883469909, -0.002560774724020352, -0.002627889461330112, -0.002696811238972633, -0.002767590632939073, -0.002840279693853443, -0.002914931990290305, -0.002991602653205161, -0.00307034842148595, -0.0031512276886316, -0.003234300550561175, -0.003319628854554451, -0.003407276249321367, -0.003497308236194718, -0.003589792221436249, -0.003684797569642276, -0.003782395658230275, -0.003882659932982528, -0.003985665964617585, -0.004091491506353762, -0.004200216552422268, -0.004311923397480176, -0.004426696696865089, -0.004544623527624869, -0.004665793450245872, -0.004790298570992826, -0.004918233604762153, -0.005049695938338056, -0.005184785693927699, -0.00532360579283698, -0.005466262019133196, -0.005612863083123813, -0.005763520684462536, -0.005918349574674441, -0.006077467618870704, -0.006240995856401099, -0.006409058560168036, -0.006581783294299928, -0.006759300969853958, -0.006941745898188195, -0.007129255841611322, -0.007321972060884183, -0.007520039359110548, -0.007723606121516425, -0.007932824350575216, -0.008147849695892528, -0.008368841478217674, -0.008595962706898969, -0.008829380090048064, -0.009069264036622303, -0.009315788649575454, -0.009569131709165263, -0.009829474645440147, -0.01009700249885904, -0.01037190386792522, -0.01065437084263892, -0.01094459892249371, -0.0112427869176577, -0.01154913683189362, -0.01186385372568032, -0.01218714555790392, -0.01251922300438796, -0.01286029925143026, -0.01321058976240911, -0.01357031201541257, -0.01393968520973361, -0.01431892993895989, -0.01470826782827034, -0.01510792113343352, -0.0155181122988826, -0.01593906347212233, -0.01637099597160384, -0.01681412970508294, -0.0172686825353622, -0.01773486959020138, -0.01821290251307155, -0.01870298865132362, -0.01920533017824401, -0.01972012314538286, -0.02024755646146103, -0.02078781079409812, -0.0213410573905543, -0.02190745681364633, -0.02248715758898904, -0.02308029475972579, -0.02368698834495291, -0.02430734169811604, -0.02494143976176358, -0.02558934721519137, -0.02625110651170501, -0.02692673580246985, -0.02761622674421725, -0.02831954218843584, -0.02903661375010649, -0.02976733925454147, -0.03051158006147556, -0.03126915826623081, -0.03203985377855015, -0.03282340128057364, -0.03361948706642364, -0.03442774576698126, -0.03524775696468499, -0.03607904170457061, -0.0369210589093126, -0.03777320170772521, -0.03863479368804887, -0.03950508508939252, -0.04038324894693, -0.04126837720887202, -0.04215947684585223, -0.04305546597619161, -0.04395517003353441, -0.04485731800658872, -0.04576053878415449, -0.04666335764227399, -0.04756419291419493, -0.04846135288788006, -0.04935303298001533, -0.0502373132398491, -0.05111215624070342, -0.05197540542161377, -0.0528247839462365, -0.05365789415086324, -0.05447221765805847, -0.05526511623701112, -0.05603383349610694, -0.05677549749739064, -0.05748712438640331, -0.05816562313424552, -0.05880780149150013, -0.05941037325572156, -0.05996996695540235, -0.06048313605349684, -0.06094637077253265, -0.06135611164086831, -0.06170876485554783, -0.06200071955122333, -0.06222836705651739, -0.06238812220872002, -0.06247644678457952, -0.06248987508887868, -0.06242504172319145, -0.06227871153440397, -0.06204781171597024, -0.06172946600417763, -0.0613210308766629, -0.06082013362080736, -0.06022471209525716, -0.05953305595850306, -0.05874384908413086, -0.05785621282300021, -0.05686974970830655, -0.05578458713042021, -0.05460142043489697, -0.05332155481959582, -0.05194694532607072, -0.05048023413717924, -0.0489247843082553, -0.04728470897454883, -0.04556489499456694, -0.04377101990935738, -0.0419095610238997, -0.03998779535119554, -0.03801378910530589, -0.03599637538979313, -0.03394511870645192, -0.0318702649098996, -0.02978267526092511, -0.02769374329013345, -0.02561529327832714, -0.02355945929632888, -0.02153854392978471, -0.01956485604905259, -0.01765052727553967, -0.01580730714842582, -0.01404633741358825, -0.01237790634292236, -0.01081118454916254, -0.009353944389343087, -0.008012265747975564, -0.006790231755417023, -0.005689618821701886, -0.004709586242181655, -0.003846371546024069, -0.003092998695387568, -0.002439007180977761, -0.001870210973059848, -0.00137165957741271, -0.0009381940033778975, -0.0005670447924757401, -0.0002550024503467287, 1.541519181281548e-06, 0.000206547071679096, 0.0003642720158964969, 0.0004792129214006736, 0.000556041249967543, 0.0005995355498593022, 0.0006145106765429187, 0.0006057451216107282, 0.0005779076428564091, 0.0005354844925722115, 0.0004827086389746784, 0.0004234924689950244, 0.0003613655521836562, 0.000299419138779909, 0.000240259164323884, 0.0001859696431228495, 0.0001380884577948018, 9.759769541724424e-05, 6.493084410874941e-05, 3.999934572342621e-05, 2.224119500932216e-05, 1.069447154251543e-05, 4.098869059887544e-06, 1.028419724598967e-06, 5.866024486295141e-08, -2.859672560556747e-08, -2.081668171172169e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 1 + }, + { + "i": 1, + "radial_function": [ + 6.832930862242145e-12, 7.094030563769529e-12, 7.365107391325697e-12, 7.646542589853654e-12, 7.93873197236469e-12, 8.242086476609253e-12, 8.557032743019187e-12, 8.884013714734025e-12, 9.223489260555057e-12, 9.575936821703663e-12, 9.941852083292898e-12, 1.032174967145727e-11, 1.071616387712073e-11, 1.11256494074209e-11, 1.155078216584644e-11, 1.19921600621842e-11, 1.245040385341599e-11, 1.292615801674694e-11, 1.342009165599352e-11, 1.393289944260603e-11, 1.446530259264844e-11, 1.501804988111029e-11, 1.559191869497659e-11, 1.618771612653694e-11, 1.680628010847137e-11, 1.744848059230907e-11, 1.811522077191774e-11, 1.880743835374372e-11, 1.952610687558963e-11, 2.027223707578417e-11, 2.104687831466899e-11, 2.185112005040274e-11, 2.26860933711569e-11, 2.355297258585851e-11, 2.445297687571705e-11, 2.538737200885757e-11, 2.635747212047179e-11, 2.736464156099071e-11, 2.841029681487743e-11, 2.949590849273912e-11, 3.062300339955891e-11, 3.179316668195718e-11, 3.30080440575014e-11, 3.42693441291996e-11, 3.557884078843299e-11, 3.693837570970534e-11, 3.834986094071982e-11, 3.981528159142372e-11, 4.133669862580354e-11, 4.291625176035663e-11, 4.455616247331439e-11, 4.62587371288505e-11, 4.802637022066612e-11, 4.986154773951421e-11, 5.176685066939915e-11, 5.374495861736611e-11, 5.579865358198739e-11, 5.793082386584331e-11, 6.014446813749985e-11, 6.244269964869652e-11, 6.482875061267209e-11, 6.730597674978819e-11, 6.987786200684134e-11, 7.254802345669968e-11, 7.532021638515614e-11, 7.819833957214799e-11, 8.118644077477321e-11, 8.42887224198113e-11, 8.750954751375377e-11, 9.085344577865709e-11, 9.432512002244186e-11, 9.792945275260143e-11, 1.016715130426159e-10, 1.055565636607287e-10, 1.095900684711113e-10, 1.137777001178184e-10, 1.181253480023452e-10, 1.226391265659994e-10, 1.273253838887361e-10, 1.321907106165476e-10, 1.372419492299568e-10, 1.424862036666546e-10, 1.479308493118036e-10, 1.53583543370062e-10, 1.594522356339081e-10, 1.655451796634068e-10, 1.718709443931413e-10, 1.784384261826247e-10, 1.852568613271414e-10, 1.92335839046605e-10, 1.99685314970695e-10, 2.073156251392375e-10, 2.152375005375118e-10, 2.234620821869191e-10, 2.32000936812235e-10, 2.408660731074669e-10, 2.500699586231953e-10, 2.596255372991336e-10, 2.695462476665631e-10, 2.798460417462329e-10, 2.905394046682886e-10, 3.01641375041827e-10, 3.131675661027019e-10, 3.251341876693193e-10, 3.375580689372941e-10, 3.504566821449983e-10, 3.638481671432948e-10, 3.777513569039784e-10, 3.921858040027972e-10, 4.071718081142824e-10, 4.227304445570289e-10, 4.388835939295747e-10, 4.556539728785253e-10, 4.730651660421824e-10, 4.91141659214584e-10, 5.099088737765638e-10, 5.29393202442248e-10, 5.496220463712275e-10, 5.706238536985709e-10, 5.924281595368539e-10, 6.150656275064072e-10, 6.385680928521925e-10, 6.629686072078796e-10, 6.883014850700714e-10, 7.146023520479834e-10, 7.419081949563909e-10, 7.702574138222725e-10, 7.996898758782209e-10, 8.302469716185225e-10, 8.6197167299669e-10, 8.949085938462227e-10, 9.291040526095377e-10, 9.646061374632077e-10, 1.001464773931038e-09, 1.039731795080014e-09, 1.079461014397724e-09, 1.120708301453721e-09, 1.16353166045112e-09, 1.207991311778802e-09, 1.254149776678853e-09, 1.302071965148169e-09, 1.351825267197766e-09, 1.403479647598026e-09, 1.45710774424297e-09, 1.512784970271807e-09, 1.570589620091173e-09, 1.630602979447098e-09, 1.69290943970127e-09, 1.757596616472172e-09, 1.824755472807781e-09, 1.894480447062787e-09, 1.966869585660081e-09, 2.042024680922889e-09, 2.120051414171238e-09, 2.201059504283733e-09, 2.285162861933253e-09, 2.372479749713295e-09, 2.463132948379784e-09, 2.557249929441838e-09, 2.654963034343924e-09, 2.756409660490908e-09, 2.86173245437741e-09, 2.971079512092496e-09, 3.084604587481334e-09, 3.20246730825613e-09, 3.324833400359577e-09, 3.451874920896184e-09, 3.583770499958082e-09, 3.720705591685103e-09, 3.86287273491152e-09, 4.010471823765088e-09, 4.163710388598673e-09, 4.32280388764827e-09, 4.48797600982715e-09, 4.659458989080961e-09, 4.837493930744741e-09, 5.022331150360359e-09, 5.214230525429075e-09, 5.413461860593467e-09, 5.620305266760734e-09, 5.835051554699152e-09, 6.058002643660434e-09, 6.289471985600405e-09, 6.52978500559375e-09, 6.779279559060222e-09, 7.038306406443422e-09, 7.307229706008489e-09, 7.586427525448878e-09, 7.876292373020342e-09, 8.177231748946504e-09, 8.489668717868885e-09, 8.814042503144597e-09, 9.150809103823797e-09, 9.5004419351725e-09, 9.863432493638221e-09, 1.024029104718982e-08, 1.063154735200007e-08, 1.103775139647368e-08, 1.145947417366438e-08, 1.189730848316269e-08, 1.235186976357692e-08, 1.282379695677508e-08, 1.331375340509592e-08, 1.382242778278715e-08, 1.435053506297427e-08, 1.489881752151327e-08, 1.546804577913393e-08, 1.605901988333041e-08, 1.667257043151471e-08, 1.730955973700423e-08, 1.797088303947004e-08, 1.865746976156755e-08, 1.937028481342763e-08, 2.011032994695937e-08, 2.087864516173847e-08, 2.167631016451249e-08, 2.250444588434254e-08, 2.336421604551241e-08, 2.425682880037262e-08, 2.518353842445345e-08, 2.614564707616654e-08, 2.714450662354829e-08, 2.818152054067259e-08, 2.925814587627968e-08, 3.037589529742838e-08, 3.153633921103802e-08, 3.274110796620148e-08, 3.399189414046014e-08, 3.529045491311144e-08, 3.663861452894777e-08, 3.803826685581073e-08, 3.949137803954362e-08, 4.099998926005049e-08, 4.256621959229497e-08, 4.419226897619057e-08, 4.588042129961874e-08, 4.763304759872314e-08, 4.945260938005802e-08, 5.134166206916691e-08, 5.330285859037211e-08, 5.533895308278513e-08, 5.7452804757709e-08, 5.964738190272733e-08, 6.192576603815112e-08, 6.429115623154661e-08, 6.674687357626126e-08, 6.929636584032498e-08, 7.194321229204407e-08, 7.469112870902982e-08, 7.754397257765017e-08, 8.050574849002852e-08, 8.358061374612415e-08, 8.677288416868774e-08, 9.008704013907718e-08, 9.35277328622685e-08, 9.709979086990839e-08, 1.008082267701194e-07, 1.046582442536976e-07, 1.086552453661209e-07, 1.128048380556284e-07, 1.171128440075795e-07, 1.215853067761289e-07, 1.262285002242788e-07, 1.310489372840026e-07, 1.360533790484808e-07, 1.412488442090423e-07, 1.466426188497674e-07, 1.522422666130608e-07, 1.580556392506303e-07, 1.640908875737247e-07, 1.703564728183576e-07, 1.768611784404674e-07, 1.836141223577551e-07, 1.906247696545817e-07, 1.979029457675683e-07, 2.054588501697867e-07, 2.133030705724682e-07, 2.214465976633409e-07, 2.299008404020732e-07, 2.386776418934717e-07, 2.477892958601336e-07, 2.572485637370613e-07, 2.670686924115144e-07, 2.772634326320874e-07, 2.878470581123197e-07, 2.988343853544828e-07, 3.10240794220776e-07, 3.220822492793383e-07, 3.343753219547587e-07, 3.471372135120677e-07, 3.603857789061625e-07, 3.7413955152853e-07, 3.884177688842803e-07, 4.032403992350519e-07, 4.186281692422773e-07, 4.346025926496226e-07, 4.511860000414499e-07, 4.684015697185583e-07, 4.862733597317947e-07, 5.048263411165354e-07, 5.240864323727627e-07, 5.440805352362998e-07, 5.64836571789129e-07, 5.863835229580742e-07, 6.087514684531721e-07, 6.319716281983099e-07, 6.560764053095311e-07, 6.810994306773378e-07, 7.070756092122224e-07, 7.340411678142331e-07, 7.620337051294538e-07, 7.910922431594582e-07, 8.21257280790343e-07, 8.525708493121888e-07, 8.850765700013524e-07, 9.188197138402665e-07, 9.538472634525431e-07, 9.902079773341895e-07, 1.027952456463317e-06, 1.06713321337538e-06, 1.107804743791727e-06, 1.150023600895167e-06, 1.193848472346321e-06, 1.239340260140195e-06, 1.286562163404613e-06, 1.335579764246216e-06, 1.386461116752271e-06, 1.439276839261605e-06, 1.49410021002128e-06, 1.551007266348346e-06, 1.610076907422233e-06, 1.671391000835599e-06, 1.735034493036632e-06, 1.801095523799931e-06, 1.869665544868031e-06, 1.940839442908764e-06, 2.014715666941202e-06, 2.091396360384642e-06, 2.170987497892047e-06, 2.25359902713498e-06, 2.339345015709828e-06, 2.428343803343326e-06, 2.520718159579561e-06, 2.616595447136084e-06, 2.716107791123322e-06, 2.819392254327525e-06, 2.926591018762119e-06, 3.037851573701071e-06, 3.153326910411427e-06, 3.273175723811226e-06, 3.397562621283961e-06, 3.526658338887617e-06, 3.660639965204578e-06, 3.799691173084512e-06, 3.944002459538552e-06, 4.093771394053742e-06, 4.249202875599555e-06, 4.41050939860956e-06, 4.577911328227298e-06, 4.751637185111677e-06, 4.93192394010868e-06, 5.119017319099619e-06, 5.313172118347283e-06, 5.514652530667453e-06, 5.723732482762246e-06, 5.940695984058568e-06, 6.165837487404183e-06, 6.399462261980548e-06, 6.641886778800439e-06, 6.893439109166022e-06, 7.154459336470082e-06, 7.425299981730864e-06, 7.706326443260626e-06, 7.997917450871113e-06, 8.300465535029524e-06, 8.614377511385465e-06, 8.940074981093218e-06, 9.277994847362496e-06, 9.62858984867509e-06, 9.992329109111471e-06, 1.036969870623555e-05, 1.076120225698913e-05, 1.116736152205366e-05, 1.158871702913808e-05, 1.202582871565475e-05, 1.247927659124531e-05, 1.294966142062224e-05, 1.34376054271864e-05, 1.394375301788416e-05, 1.446877152976144e-05, 1.501335199866845e-05, 1.557820995056551e-05, 1.616408621586775e-05, 1.67717477672646e-05, 1.740198858143144e-05, 1.805563052504423e-05, 1.873352426548819e-05, 1.943655020663553e-05, 2.016561945004732e-05, 2.092167478193106e-05, 2.170569168615822e-05, 2.251867938361823e-05, 2.336168189815046e-05, 2.423577914926122e-05, 2.514208807178865e-05, 2.608176376263582e-05, 2.705600065464099e-05, 2.806603371759609e-05, 2.911313968637124e-05, 3.019863831602574e-05, 3.132389366372331e-05, 3.249031539718264e-05, 3.369936012931106e-05, 3.495253277857479e-05, 3.625138795455345e-05, 3.759753136801914e-05, 3.899262126475792e-05, 4.043836988221997e-05, 4.193654492794397e-05, 4.348897107855297e-05, 4.509753149794472e-05, 4.676416937313549e-05, 4.849088946601096e-05, 5.027975967904798e-05, 5.213291263283556e-05, 5.405254725299866e-05, 5.604093036386743e-05, 5.810039828596495e-05, 6.023335843409399e-05, 6.244229091249397e-05, 6.472975010320046e-05, 6.709836624338892e-05, 6.955084698709989e-05, 7.208997894633635e-05, 7.471862920609482e-05, 7.743974680742385e-05, 8.025636419212028e-05, 8.317159860213805e-05, 8.618865342624311e-05, 8.93108194858434e-05, 9.254147625130346e-05, 9.5884092979382e-05, 9.934222976172249e-05, 0.000102919538473585, 0.000106619763611205, 0.0001104467430053252, 0.0001144044083975679, 0.0001184967858653547, 0.0001227279960801148, 0.0001271022543824488, 0.0001316238706568271, 0.0001362972489872315, 0.0001411268870739139, 0.0001461173753901688, 0.0001512733960566467, 0.0001565997214093113, 0.0001621012122356482, 0.0001677828156521532, 0.0001736495625944945, 0.0001797065648900075, 0.0001859590118803801, 0.0001924121665605076, 0.0001990713611975214, 0.0002059419923919556, 0.0002130295155408712, 0.0002203394386605423, 0.0002278773155240051, 0.0002356487380663689, 0.0002436593280083283, 0.0002519147276457326, 0.0002604205897504444, 0.0002691825665249863, 0.0002782062975506576, 0.0002874973966659629, 0.0002970614377092077, 0.0003069039390561352, 0.0003170303468804008, 0.0003274460170615634, 0.0003381561956621283, 0.000349165997891986, 0.0003604803854753931, 0.0003721041423324394, 0.0003840418484837649, 0.000396297852084134, 0.000408876239487371, 0.0004217808032421478, 0.0004350150079151947, 0.0004485819536356989, 0.0004624843372520824, 0.0004767244109898719, 0.0004913039384972425, 0.0005062241481628953, 0.0005214856835893727, 0.0005370885511037408, 0.0005530320641868381, 0.0005693147847020383, 0.0005859344608048607, 0.0006028879614157031, 0.0006201712071397544, 0.0006377790975206448, 0.0006557054345178582, 0.0006739428421024191, 0.0006924826818709238, 0.0007113149645848623, 0.0007304282575503606, 0.0007498095877631725, 0.0007694443407551385, 0.000789316155091444, 0.0008094068124831634, 0.0008296961234968131, 0.0008501618088621921, 0.000870779376401893, 0.0008915219936305985, 0.0009123603561000265, 0.0009332625515961455, 0.0009541939203295107, 0.0009751169112973106, 0.000995990935037287, 0.001016772213039405, 0.001037413624131067, 0.001057864548206247, 0.001078070707728259, 0.001097974007500307, 0.001117512373267681, 0.001136619589790723, 0.00115522513910871, 0.001173254039801762, 0.001190626688151019, 0.00120725870219673, 0.001223060769799749, 0.001237938501924263, 0.001251792292478439, 0.00126451718617509, 0.001276002756006256, 0.001286132992063672, 0.001294786203581212, 0.001301834936225085, 0.001307145906812581, 0.001310579957799632, 0.001311992034040918, 0.001311231184492588, 0.001308140591695944, 0.001302557632049339, 0.00129431397004351, 0.001283235689801053, 0.001269143467421514, 0.001251852787787601, 0.001231174209632586, 0.001206913682801183, 0.001178872921752928, 0.001146849839454593, 0.001110639045882542, 0.001070032415402755, 0.001024819727310607, 0.0009747893837893296, 0.0009197292094794275, 0.000859427336735273, 0.0007936731804729848, 0.000722258506278369, 0.0006449785951380522, 0.0005616335077728257, 0.0004720294510817504, 0.0003759802486403701, 0.0002733089165274649, 0.0001638493449741441, 4.744808542737731e-05, -7.60337584109666e-05, -0.000206718538176717, -0.0003447099709887667, -0.0004900909575337835, -0.0006429213151008593, -0.0008032354343631272, -0.0009710398706552773, -0.001146310882614778, -0.001328991933337856, -0.001518991171645532, -0.001716178913651716, -0.001920385147562389, -0.002131397087497051, -0.002348956805087135, -0.002572758970645353, -0.002802448738779795, -0.00303761981640516, -0.003277812754133309, -0.003522513504945484, -0.003771152296794633, -0.004023102868279103, -0.004277682118681749, -0.0045341502253839, -0.004791711282830432, -0.005049514517722944, -0.005306656134820803, -0.005562181846494008, -0.005815090136850045, -0.00606433630768947, -0.006308837348572564, -0.006547477666735321, -0.006779115704312982, -0.007002591460154663, -0.007216734921291003, -0.007420375394715233, -0.007612351713443166, -0.007791523271747102, -0.007956781822970629, -0.008107063949431167, -0.008241364087673732, -0.008358747963892027, -0.008458366263909143, -0.008539468330035554, -0.008601415643837441, -0.00864369481992285, -0.00866592980199744, -0.008667892919522325, -0.00864951443235656, -0.008610890162994054, -0.00855228679279749, -0.008474144381560399, -0.008377075660537064, -0.008261861649677009, -0.008129443162236258, -0.007980907786368282, -0.007817471975946884, -0.007640457943968616, -0.007451265133581379, -0.007251336146086722, -0.00704211713389136, -0.006825012820663157, -0.006601336491654121, -0.00637225550434336, -0.006138733102383134, -0.005901467572353623, -0.005660830059790229, -0.005417141232685514, -0.005171806662640416, -0.004926575463078934, -0.004683225410317987, -0.004443541537885443, -0.004209292589188294, -0.003982205486649152, -0.003763938043272593, -0.003556050218419664, -0.003359974303287429, -0.00317698451292044, -0.00300816655998515, -0.002854387890228648, -0.002716269369357621, -0.002594159324532157, -0.002488110958874686, -0.002397864271954351, -0.002322833730340382, -0.002262103036614851, -0.002214428438837956, -0.002178252100917225, -0.002151727112615666, -0.002132755750555423, -0.002119042602447191, -0.00210816412944291, -0.002097656159160223, -0.002085120667430676, -0.002068353013122262, -0.002045490530360856, -0.002015183049490478, -0.00197678550610339, -0.001930448976887343, -0.001876568588579146, -0.001815558592431724, -0.001747883051367694, -0.001674054450614415, -0.001594631784266665, -0.001510218088427467, -0.00142145739464275, -0.001329031081222759, -0.001233653604825843, -0.001136067600429461, -0.001037038344641555, -0.0009373475852652302, -0.0008377867492093127, -0.0007391495512993895, -0.0006422240383165273, -0.0005477841157306845, -0.0004565806190785227, -0.0003693320077488987, -0.0002867147760370337, -0.0002093536945798891, -0.0001378120146049711, -7.25817875898982e-05, -1.407447375401105e-05, 3.738796598365401e-05, 8.158127865787179e-05, 0.0001183861915545481, 0.0001477944907845585, 0.0001699136294208148, 0.000184969570642679, 0.000193307555508615, 0.0001953904716806565, 0.0001917944892682877, 0.0001832016235435718, 0.0001703888823024746, 0.0001542136588398302, 0.0001355950406501938, 0.0001154907199069632, 9.486921539306192e-05, 7.467714776770995e-05, 5.580135181754153e-05, 3.902566156949172e-05, 2.498226779973356e-05, 1.40976234035417e-05, 6.532961112831748e-06, 2.1195908408711e-06, 2.892610191651546e-07, 2.775557561562891e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 1, + "j": 2 + }, + { + "i": 1, + "radial_function": [ + 1.171126808701882e-11, 1.215877864868454e-11, 1.262338945809948e-11, 1.310575394829884e-11, 1.360655052119712e-11, 1.41264835016921e-11, 1.466628412822691e-11, 1.52267115812029e-11, 1.580855405068964e-11, 1.641262984493402e-11, 1.703978854122672e-11, 1.769091218074539e-11, 1.836691650905437e-11, 1.906875226400573e-11, 1.979740651285317e-11, 2.055390404045826e-11, 2.133930879054289e-11, 2.215472536201336e-11, 2.30013005624614e-11, 2.388022502102671e-11, 2.479273486288874e-11, 2.574011344774351e-11, 2.672369317470975e-11, 2.774485735620288e-11, 2.880504216341238e-11, 2.990573864611776e-11, 3.104849482968496e-11, 3.223491789219112e-11, 3.34666764247401e-11, 3.474550277814764e-11, 3.607319549929547e-11, 3.745162186058206e-11, 3.888272048602625e-11, 4.036850407771716e-11, 4.191106224644524e-11, 4.351256445049407e-11, 4.517526304672689e-11, 4.690149645825848e-11, 4.869369246316649e-11, 5.055437160886845e-11, 5.248615075696415e-11, 5.44917467635309e-11, 5.657398030004575e-11, 5.873577982030811e-11, 6.098018567894326e-11, 6.331035440727488e-11, 6.572956315258417e-11, 6.824121428699555e-11, 7.084884019247111e-11, 7.355610822864379e-11, 7.636682589047316e-11, 7.928494616298054e-11, 8.231457308058999e-11, 8.545996749889533e-11, 8.872555308697094e-11, 9.211592254864828e-11, 9.56358440815134e-11, 9.929026808270362e-11, 1.030843341109356e-10, 1.070233781145556e-10, 1.111129399357733e-10, 1.153587711016368e-10, 1.197668429127002e-10, 1.24343354840762e-10, 1.290947432474813e-10, 1.34027690436132e-10, 1.391491340492252e-10, 1.444662768252155e-10, 1.49986596728007e-10, 1.55717857463511e-10, 1.616681193980331e-10, 1.678457508938546e-10, 1.742594400779393e-10, 1.809182070603192e-10, 1.878314166193418e-10, 1.950087913716087e-10, 2.024604254451353e-10, 2.101967986749519e-10, 2.182287913411074e-10, 2.265676994698038e-10, 2.352252507191703e-10, 2.442136208720222e-10, 2.535454509587903e-10, 2.63233865034699e-10, 2.73292488636193e-10, 2.837354679425565e-10, 2.945774896696793e-10, 3.058338017239312e-10, 3.175202346451942e-10, 3.296532238692007e-10, 3.422498328404729e-10, 3.55327777008375e-10, 3.689054487400073e-10, 3.830019431849739e-10, 3.976370851283951e-10, 4.128314568699079e-10, 4.286064271678714e-10, 4.449841812894545e-10, 4.619877522088683e-10, 4.796410529976081e-10, 4.979689104522241e-10, 5.169971000069348e-10, 5.367523819801379e-10, 5.572625392057925e-10, 5.785564161025861e-10, 6.00663959235788e-10, 6.236162594288511e-10, 6.474455954839363e-10, 6.721854795728414e-10, 6.978707043621464e-10, 7.245373919388093e-10, 7.522230446050183e-10, 7.809665976136958e-10, 8.108084739187836e-10, 8.417906410172971e-10, 8.739566699630221e-10, 9.073517966348474e-10, 9.42022985345846e-10, 9.780189948825086e-10, 1.015390447066993e-09, 1.054189897938718e-09, 1.094471911655415e-09, 1.136293137217473e-09, 1.179712388123442e-09, 1.224790725068661e-09, 1.271591541803227e-09, 1.320180654270026e-09, 1.370626393148069e-09, 1.422999699931234e-09, 1.477374226677453e-09, 1.533826439568497e-09, 1.592435726425977e-09, 1.653284508334605e-09, 1.716458355529618e-09, 1.782046107711255e-09, 1.850139998955286e-09, 1.920835787395243e-09, 1.994232889858522e-09, 2.070434521645563e-09, 2.149547841648581e-09, 2.231684103013674e-09, 2.316958809558126e-09, 2.405491878162619e-09, 2.497407807366551e-09, 2.592835852403383e-09, 2.691910206921869e-09, 2.794770191648588e-09, 2.901560450256787e-09, 3.012431152716701e-09, 3.127538206413125e-09, 3.247043475326686e-09, 3.371115007586895e-09, 3.499927271716502e-09, 3.633661401899093e-09, 3.772505452614474e-09, 3.916654662999387e-09, 4.06631173130506e-09, 4.221687099837001e-09, 4.382999250777164e-09, 4.550475013304166e-09, 4.724349882442601e-09, 4.904868350089525e-09, 5.092284248682772e-09, 5.286861107993769e-09, 5.488872525545987e-09, 5.698602551178772e-09, 5.916346086297179e-09, 6.142409298367604e-09, 6.377110051241751e-09, 6.620778351912915e-09, 6.873756814331403e-09, 7.136401140930969e-09, 7.409080622541283e-09, 7.692178657388859e-09, 7.986093289914636e-09, 8.291237770164144e-09, 8.608041134536205e-09, 8.936948808703978e-09, 9.278423233555473e-09, 9.632944515031399e-09, 1.000101109877184e-08, 1.038314047051932e-08, 1.077986988325955e-08, 1.119175711212117e-08, 1.161938123809281e-08, 1.206334346165651e-08, 1.25242679474799e-08, 1.300280270135023e-08, 1.349962048058126e-08, 1.40154197391694e-08, 1.455092560902368e-08, 1.510689091864684e-08, 1.568409725069386e-08, 1.6283356039892e-08, 1.690550971286086e-08, 1.755143287142925e-08, 1.822203352110921e-08, 1.891825434644659e-08, 1.964107403503677e-08, 2.039150865206049e-08, 2.117061306726417e-08, 2.197948243638626e-08, 2.281925373910175e-08, 2.369110737564122e-08, 2.459626882431933e-08, 2.553601036229316e-08, 2.651165285196171e-08, 2.752456759550483e-08, 2.857617826015956e-08, 2.966796287692814e-08, 3.080145591551163e-08, 3.197825043838533e-08, 3.320000033699997e-08, 3.44684226532875e-08, 3.578529998964573e-08, 3.715248301086818e-08, 3.8571893041398e-08, 4.004552476168442e-08, 4.157544900723921e-08, 4.316381567448038e-08, 4.481285673728342e-08, 4.652488937852513e-08, 4.830231924098888e-08, 5.014764380217289e-08, 5.2063455877676e-08, 5.405244725810341e-08, 5.611741248456411e-08, 5.826125276797229e-08, 6.048698005767576e-08, 6.279772126514296e-08, 6.51967226484059e-08, 6.768735436364049e-08, 7.027311518998055e-08, 7.295763743432926e-08, 7.574469202290716e-08, 7.863819378668935e-08, 8.16422069480404e-08, 8.476095081620925e-08, 8.799880569966785e-08, 9.136031904344833e-08, 9.485021179998009e-08, 9.847338504256495e-08, 1.022349268301933e-07, 1.061401193337548e-07, 1.101944462332708e-07, 1.144036003964921e-07, 1.187734918494526e-07, 1.233102560502218e-07, 1.280202624772528e-07, 1.329101235441033e-07, 1.379867038531887e-07, 1.432571298011493e-07, 1.487287995492573e-07, 1.544093933727295e-07, 1.603068844031067e-07, 1.664295497788589e-07, 1.727859822193037e-07, 1.793851020382353e-07, 1.862361696134852e-07, 1.933487983299988e-07, 2.007329680141428e-07, 2.083990388778079e-07, 2.163577659915846e-07, 2.246203143069353e-07, 2.331982742483676e-07, 2.421036778964917e-07, 2.513490157850084e-07, 2.609472543341994e-07, 2.709118539451398e-07, 2.812567877796456e-07, 2.919965612513992e-07, 3.031462322555016e-07, 3.147214321639428e-07, 3.267383876158083e-07, 3.392139431322921e-07, 3.521655845873502e-07, 3.656114635662873e-07, 3.795704226452525e-07, 3.940620216270992e-07, 4.091065647682248e-07, 4.247251290346834e-07, 4.409395934256602e-07, 4.577726694041826e-07, 4.752479324768549e-07, 4.933898549655035e-07, 5.122238400151516e-07, 5.31776256884839e-07, 5.520744775690197e-07, 5.731469147992544e-07, 5.95023061477974e-07, 6.177335315971406e-07, 6.413101026979485e-07, 6.657857599281476e-07, 6.911947417573253e-07, 7.175725874109219e-07, 7.449561860877339e-07, 7.7338382802653e-07, 8.028952574905853e-07, 8.335317277417453e-07, 8.653360580768698e-07, 8.983526930037969e-07, 9.326277636356686e-07, 9.682091513857917e-07, 1.005146554047565e-06, 1.04349155434844e-06, 1.08329769106767e-06, 1.124620532814305e-06, 1.167517754560953e-06, 1.212049217036739e-06, 1.258277049083011e-06, 1.306265733081526e-06, 1.356082193566673e-06, 1.407795889139286e-06, 1.461478907801847e-06, 1.517206065840923e-06, 1.575055010385233e-06, 1.635106325774051e-06, 1.69744364387418e-06, 1.762153758488983e-06, 1.829326744007811e-06, 1.899056078449617e-06, 1.971438771059785e-06, 2.046575494624354e-06, 2.124570722671591e-06, 2.205532871737807e-06, 2.289574448878527e-06, 2.376812204612919e-06, 2.467367291497633e-06, 2.561365428529859e-06, 2.658937071587877e-06, 2.760217590125418e-06, 2.865347450339753e-06, 2.97447240504597e-06, 3.08774369049219e-06, 3.205318230362987e-06, 3.32735884722339e-06, 3.454034481664652e-06, 3.585520419423434e-06, 3.721998526750996e-06, 3.863657494323923e-06, 4.010693089989203e-06, 4.163308420654476e-06, 4.321714203637691e-06, 4.486129047802773e-06, 4.656779744818938e-06, 4.833901570890647e-06, 5.017738599315755e-06, 5.208544024240948e-06, 5.406580495995497e-06, 5.612120468394374e-06, 5.825446558414227e-06, 6.046851918658516e-06, 6.276640623038409e-06, 6.515128066110958e-06, 6.762641376527046e-06, 7.019519845054285e-06, 7.286115367655571e-06, 7.562792904114231e-06, 7.849930952712765e-06, 8.147922041484556e-06, 8.457173236573082e-06, 8.778106668244995e-06, 9.111160075121303e-06, 9.456787367201292e-06, 9.815459208271005e-06, 1.018766361830302e-05, 1.057390659646496e-05, 1.097471276537427e-05, 1.139062603724854e-05, 1.182221030261307e-05, 1.227005014224743e-05, 1.273475156306287e-05, 1.321694275861745e-05, 1.371727489499083e-05, 1.423642292275494e-05, 1.477508641578688e-05, 1.533399043768839e-05, 1.591388643658427e-05, 1.651555316908756e-05, 1.713979765422796e-05, 1.778745615815403e-05, 1.845939521042615e-05, 1.91565126527281e-05, 1.987973872083386e-05, 2.063003716067179e-05, 2.14084063793339e-05, 2.221588063188305e-05, 2.305353124481357e-05, 2.392246787702076e-05, 2.482383981913529e-05, 2.57588373320736e-05, 2.672869302565308e-05, 2.773468327810845e-05, 2.87781296973406e-05, 2.986040062471375e-05, 3.098291268219638e-05, 3.214713236362786e-05, 3.335457767086451e-05, 3.460681979553157e-05, 3.590548484707631e-05, 3.725225562777816e-05, 3.864887345533298e-05, 4.009714003357389e-05, 4.159891937184758e-05, 4.315613975349135e-05, 4.477079575379998e-05, 4.644495030779175e-05, 4.818073682799502e-05, 4.998036137239564e-05, 5.184610486257548e-05, 5.378032535196231e-05, 5.578546034399584e-05, 5.786402915987026e-05, 6.001863535538e-05, 6.225196918622841e-05, 6.456681012098942e-05, 6.696602940072496e-05, 6.945259264405624e-05, 7.202956249626758e-05, 7.47001013207827e-05, 7.746747393109345e-05, 8.033505036094761e-05, 8.330630867029483e-05, 8.63848377841733e-05, 8.95743403613684e-05, 9.287863568929789e-05, 9.630166260118442e-05, 9.984748241113087e-05, 0.000103520281862269, 0.0001073243760826371, 0.000111264211542928, 0.0001153443690096704, 0.0001195695664868003, 0.0001239446621379397, 0.0001284746571809916, 0.00013316469874594, 0.0001380200826859421, 0.000143046256330964, 0.0001482488211723325, 0.0001536335354656017, 0.0001592063167381485, 0.000164973244186823, 0.0001709405609498495, 0.0001771146762359616, 0.0001835021672924851, 0.0001901097811927133, 0.0001969444364214993, 0.0002040132242364539, 0.000211323409780558, 0.0002188824329202779, 0.0002266979087814973, 0.0002347776279536982, 0.000243129556330809, 0.0002517618345550792, 0.0002606827770281057, 0.0002699008704508306, 0.0002794247718519041, 0.0002892633060612289, 0.0002994254625828475, 0.0003099203918185117, 0.0003207574005903341, 0.0003319459469078652, 0.0003434956339217027, 0.0003554162030024243, 0.0003677175258801268, 0.0003804095957762153, 0.0003935025174553407, 0.0004070064961214299, 0.0004209318250777189, 0.000435288872066476, 0.0004500880641997698, 0.000465339871388144, 0.0004810547881694476, 0.0004972433138353364, 0.000513915930748088, 0.0005310830807353885, 0.0005487551394457093, 0.0005669423885416581, 0.0005856549856035298, 0.0006049029316099163, 0.0006246960358569215, 0.0006450438781721832, 0.0006659557682745319, 0.0006874407021248321, 0.0007095073151083076, 0.0007321638318835055, 0.0007554180127281168, 0.0007792770962070139, 0.0008037477379834123, 0.0008288359455897503, 0.0008545470089710511, 0.0008808854266100912, 0.0009078548270407341, 0.0009354578855535285, 0.000963696235895957, 0.0009925703767689029, 0.001022079572920944, 0.001052221750643096, 0.0010829933874689, 0.00111438939588818, 0.001146403000887718, 0.001179025611138682, 0.001212246683658833, 0.001246053581787925, 0.001280431426327047, 0.00131536293970752, 0.001350828283072391, 0.001386804886173827, 0.001423267270013175, 0.001460186862177235, 0.001497531804854792, 0.001535266755551981, 0.00157335268056383, 0.001611746641302872, 0.001650401573634124, 0.001689266060419628, 0.001728284097535356, 0.001767394853688955, 0.001806532424439175, 0.001845625580896925, 0.0018845975136745, 0.001923365572743784, 0.001961841003966673, 0.001999928683171944, 0.002037526848772727, 0.002074526834047897, 0.002110812800349625, 0.002146261472647992, 0.002180741878982697, 0.00221411509556119, 0.002246233999422574, 0.002276943030777248, 0.002306077967333472, 0.002333465713133824, 0.002358924104646529, 0.00238226173708856, 0.002403277814198695, 0.00242176202492863, 0.002437494450777913, 0.002450245507762681, 0.002459775927277544, 0.002465836780382899, 0.002468169550324268, 0.002466506258363759, 0.00246056964827363, 0.00245007343510513, 0.002434722624098508, 0.002414213905838593, 0.002388236133979636, 0.002356470892058317, 0.00231859315607915, 0.002274272059685371, 0.002223171768814141, 0.00216495247276935, 0.002099271498620503, 0.00202578455574262, 0.001944147117140188, 0.001854015943937306, 0.001755050759054799, 0.001646916075621834, 0.001529283185071283, 0.001401832309131934, 0.001264254919043429, 0.001116256224266525, 0.0009575578317294127, 0.0007879005752247498, 0.0006070475129388222, 0.0004147870892403707, 0.0002109364547686529, -4.65506347167395e-06, -0.0002321023528889369, -0.000471480780685908, -0.0007228223506918561, -0.0009861117718807954, -0.00126128246246692, -0.001548212517385888, -0.001846720671137636, -0.002156562292364603, -0.002477425451144114, -0.002808927104749229, -0.003150609452537455, -0.003501936515606723, -0.003862291001846224, -0.004230971521931354, -0.004607190226570545, -0.004990070939804185, -0.005378647867257162, -0.005771864961817198, -0.006168576032096498, -0.006567545681056047, -0.006967451163141956, -0.0073668852479899, -0.007764360176971108, -0.008158312795343592, -0.008547110937278581, -0.008929061133295767, -0.009302417699392322, -0.009665393254122254, -0.01001617069381473, -0.01035291663676335, -0.01067379632435153, -0.01097698994051291, -0.01126071028051714, -0.01152322166574373, -0.011762859962854, -0.01197805352369134, -0.01216734481653975, -0.01232941247039807, -0.01246309340218631, -0.01256740464297914, -0.01264156442435163, -0.01268501203084469, -0.01269742587078124, -0.01267873916682755, -0.01262915262171693, -0.01254914337564929, -0.01243946954255621, -0.01230116959547387, -0.01213555586975687, -0.01194420147009615, -0.0117289199067568, -0.01149173685172888, -0.01123485350021079, -0.01096060115055133, -0.01067138677975815, -0.01036962959481476, -0.01005768878459631, -0.009737782984541223, -0.009411902296695328, -0.00908171408015743, -0.008748464138430341, -0.008412875375753876, -0.008075549726313642, -0.007738655384473636, -0.007404833849729, -0.007076730224218869, -0.006756958108850494, -0.006448061832511065, -0.006152476387813785, -0.005872485553327892, -0.00561017879717018, -0.00536740767913272, -0.005145742596696127, -0.004946430852365628, -0.004770357153334594, -0.004618007786456785, -0.004489439838293179, -0.00438425694733997, -0.004301593178471678, -0.004240106692654753, -0.004197984941812026, -0.004172963142587513, -0.004162357766326591, -0.004163116718047699, -0.004171887756560605, -0.004185106522987148, -0.004199105287808959, -0.004210243189680871, -0.004215058315776364, -0.004210441457908963, -0.004193830767089166, -0.004163425819844177, -0.004118418803535043, -0.004059080761561584, -0.003986017634266675, -0.003899874340321061, -0.003801374038449004, -0.003691315258495753, -0.003570568221012066, -0.003440070320604963, -0.003300820755957612, -0.003153874298301246, -0.003000334200266144, -0.002841344258469658, -0.002678080055934992, -0.002511739424436943, -0.002343532182106381, -0.002174669217988728, -0.002006351012618285, -0.001839755701856627, -0.001676026810013678, -0.001516260797335633, -0.001361494585941841, -0.001212693246836927, -0.001070738048205427, -0.000936415081327302, -0.0008104046945281995, -0.0006932719769825457, -0.0005854585422790937, -0.0004872758657187559, -0.0003989004286761855, -0.0003203709172881208, -0.000251587710506429, -0.0001923148735223923, -0.0001421848459793873, -0.0001007059796620247, -6.727303685100106e-05, -4.118070770936511e-05, -2.164014255334745e-05, -7.798422224689072e-06, 1.239193042831821e-06, 6.384488916058584e-06, 8.539352097519659e-06, 8.567123341504668e-06, 7.262837332752681e-06, 5.323066482626482e-06, 3.316219252744168e-06, 1.654280374047135e-06, 5.67117358782343e-07, 8.061171560824754e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 1, + "j": 3 + }, + { + "i": 2, + "radial_function": [ + 5.975460881527213e-10, 6.126730916634328e-10, 6.281830397711435e-10, 6.440856269018821e-10, 6.603907929042902e-10, 6.771087292628788e-10, 6.942498854686025e-10, 7.118249755507284e-10, 7.298449847740625e-10, 7.483211765057695e-10, 7.672650992560123e-10, 7.86688593896881e-10, 8.0660380106408e-10, 8.270231687460148e-10, 8.479594600650393e-10, 8.694257612556921e-10, 8.914354898449561e-10, 9.140024030396204e-10, 9.371406063260068e-10, 9.608645622874428e-10, 9.851890996449709e-10, 1.010129422526975e-09, 1.035701119973514e-09, 1.061920175681282e-09, 1.088802977995339e-09, 1.116366330153807e-09, 1.144627460791966e-09, 1.173604034712324e-09, 1.203314163927386e-09, 1.233776418982025e-09, 1.265009840562542e-09, 1.297033951399683e-09, 1.329868768473048e-09, 1.36353481552451e-09, 1.398053135888531e-09, 1.433445305647297e-09, 1.469733447119017e-09, 1.506940242687728e-09, 1.545088948983319e-09, 1.58420341142061e-09, 1.624308079106589e-09, 1.665428020125135e-09, 1.70758893720879e-09, 1.750817183807388e-09, 1.795139780563586e-09, 1.840584432205566e-09, 1.887179544867599e-09, 1.934954243849127e-09, 1.983938391823641e-09, 2.034162607508671e-09, 2.085658284808498e-09, 2.138457612441781e-09, 2.192593594066106e-09, 2.248100068912262e-09, 2.305011732941103e-09, 2.363364160536083e-09, 2.423193826745349e-09, 2.484538130086992e-09, 2.547435415931912e-09, 2.611925000478903e-09, 2.678047195336918e-09, 2.745843332729933e-09, 2.81535579134019e-09, 2.886628022805976e-09, 2.959704578890548e-09, 3.034631139339152e-09, 3.111454540441696e-09, 3.190222804318776e-09, 3.270985168949616e-09, 3.353792118960489e-09, 3.438695417193075e-09, 3.525748137072433e-09, 3.615004695794827e-09, 3.706520888356363e-09, 3.800353922443465e-09, 3.896562454207297e-09, 3.995206624944358e-09, 4.096348098706391e-09, 4.200050100862964e-09, 4.306377457641068e-09, 4.415396636666322e-09, 4.527175788531374e-09, 4.641784789417359e-09, 4.759295284795217e-09, 4.879780734234276e-09, 5.003316457346051e-09, 5.129979680892188e-09, 5.259849587085954e-09, 5.393007363117721e-09, 5.529536251935224e-09, 5.669521604310648e-09, 5.813050932227059e-09, 5.960213963617745e-09, 6.111102698492628e-09, 6.265811466487147e-09, 6.424436985869474e-09, 6.587078424043205e-09, 6.753837459583515e-09, 6.92481834584547e-09, 7.100127976184706e-09, 7.27987595083095e-09, 7.464174645456922e-09, 7.653139281485022e-09, 7.846887998176354e-09, 8.045541926547264e-09, 8.249225265159386e-09, 8.45806535783147e-09, 8.67219277332105e-09, 8.891741387026492e-09, 9.11684846476056e-09, 9.347654748647797e-09, 9.584304545200272e-09, 9.826945815626559e-09, 1.007573026843071e-08, 1.033081345435961e-08, 1.059235486375819e-08, 1.086051802639371e-08, 1.113547061381188e-08, 1.141738454428924e-08, 1.170643609044754e-08, 1.200280598959798e-08, 1.230667955688471e-08, 1.261824680129857e-08, 1.293770254463432e-08, 1.326524654346582e-08, 1.360108361421641e-08, 1.394542376140265e-08, 1.429848230913266e-08, 1.466048003594142e-08, 1.503164331304835e-08, 1.541220424612371e-08, 1.580240082065368e-08, 1.620247705099536e-08, 1.661268313321566e-08, 1.703327560181083e-08, 1.746451749040435e-08, 1.790667849652593e-08, 1.836003515057458e-08, 1.882487098907255e-08, 1.930147673231957e-08, 1.979015046655967e-08, 2.029119783077484e-08, 2.080493220822487e-08, 2.133167492285262e-08, 2.187175544068042e-08, 2.242551157632364e-08, 2.299328970475276e-08, 2.357544497843748e-08, 2.417234155001033e-08, 2.478435280059066e-08, 2.541186157391323e-08, 2.605526041641044e-08, 2.671495182339879e-08, 2.739134849152724e-08, 2.808487357764645e-08, 2.879596096426258e-08, 2.952505553174513e-08, 3.027261343746099e-08, 3.103910240201126e-08, 3.182500200275402e-08, 3.26308039747971e-08, 3.345701251965517e-08, 3.430414462176425e-08, 3.517273037305742e-08, 3.60633133058068e-08, 3.697645073394291e-08, 3.791271410307195e-08, 3.887268934940969e-08, 3.985697726786407e-08, 4.086619388949951e-08, 4.190097086862397e-08, 4.296195587974697e-08, 4.404981302465985e-08, 4.51652232499005e-08, 4.630888477486834e-08, 4.748151353086356e-08, 4.86838436113319e-08, 4.991662773360283e-08, 5.118063771241714e-08, 5.247666494554813e-08, 5.380552091182655e-08, 5.516803768189168e-08, 5.656506844199256e-08, 5.799748803118007e-08, 5.946619349223415e-08, 6.097210463667798e-08, 6.251616462424904e-08, 6.40993405571945e-08, 6.572262408977797e-08, 6.738703205339075e-08, 6.909360709766969e-08, 7.084341834803955e-08, 7.263756208010126e-08, 7.447716241130675e-08, 7.636337201036825e-08, 7.82973728248577e-08, 8.028037682747772e-08, 8.231362678148091e-08, 8.439839702573944e-08, 8.653599427997889e-08, 8.872775847069329e-08, 9.097506357829194e-08, 9.327931850602112e-08, 9.564196797123433e-08, 9.806449341959666e-08, 1.005484139628141e-07, 1.030952873405178e-07, 1.057067109069176e-07, 1.083843226428907e-07, 1.111298021941612e-07, 1.139448719362586e-07, 1.16831298066962e-07, 1.197908917269477e-07, 1.228255101493837e-07, 1.259370578392378e-07, 1.291274877830721e-07, 1.323988026901449e-07, 1.357530562656294e-07, 1.391923545168098e-07, 1.427188570931253e-07, 1.463347786609543e-07, 1.500423903140699e-07, 1.538440210207003e-07, 1.577420591081813e-07, 1.617389537861923e-07, 1.658372167096041e-07, 1.700394235820052e-07, 1.743482158009753e-07, 1.787663021462394e-07, 1.832964605118459e-07, 1.879415396835377e-07, 1.927044611625517e-07, 1.975882210370701e-07, 2.025958919026295e-07, 2.077306248327874e-07, 2.129956514014188e-07, 2.183942857580291e-07, 2.239299267575237e-07, 2.296060601459065e-07, 2.354262608034358e-07, 2.413941950467845e-07, 2.475136229918353e-07, 2.537884009787392e-07, 2.602224840609627e-07, 2.668199285600653e-07, 2.735848946880069e-07, 2.805216492388545e-07, 2.876345683517833e-07, 2.949281403473495e-07, 3.024069686390593e-07, 3.100757747223125e-07, 3.179394012428737e-07, 3.260028151470798e-07, 3.34271110916059e-07, 3.427495138863118e-07, 3.514433836590606e-07, 3.603582176008704e-07, 3.694996544380863e-07, 3.788734779477462e-07, 3.884856207476848e-07, 3.983421681886298e-07, 4.084493623511888e-07, 4.188136061507063e-07, 4.294414675530596e-07, 4.403396839045688e-07, 4.515151663792783e-07, 4.62975004546987e-07, 4.747264710654999e-07, 4.867770265006758e-07, 4.991343242779866e-07, 5.118062157693763e-07, 5.248007555193828e-07, 5.381262066145641e-07, 5.517910462004211e-07, 5.658039711501567e-07, 5.801739038897256e-07, 5.949099983837809e-07, 6.100216462872972e-07, 6.255184832677596e-07, 6.414103955030315e-07, 6.577075263601034e-07, 6.744202832601824e-07, 6.915593447356906e-07, 7.091356676849736e-07, 7.271604948306997e-07, 7.456453623881094e-07, 7.646021079495461e-07, 7.840428785918045e-07, 8.03980139213215e-07, 8.244266811074504e-07, 8.45395630781404e-07, 8.66900459024687e-07, 8.889549902385667e-07, 9.115734120324202e-07, 9.347702850961253e-07, 9.585605533569856e-07, 9.82959554430238e-07, 1.00798303037237e-06, 1.033647138746918e-06, 1.059968464012676e-06, 1.086964029244645e-06, 1.11465130819843e-06, 1.143048237729132e-06, 1.172173230576247e-06, 1.202045188526442e-06, 1.232683515966583e-06, 1.264108133839725e-06, 1.296339494017402e-06, 1.329398594101933e-06, 1.36330699267298e-06, 1.398086824993196e-06, 1.43376081918826e-06, 1.470352312917283e-06, 1.507885270550041e-06, 1.546384300868253e-06, 1.585874675308676e-06, 1.626382346766504e-06, 1.667933968978293e-06, 1.710556916504324e-06, 1.754279305331143e-06, 1.799130014115767e-06, 1.845138706093933e-06, 1.892335851675599e-06, 1.940752751751881e-06, 1.990421561738458e-06, 2.041375316381618e-06, 2.093647955354011e-06, 2.147274349668313e-06, 2.20229032893824e-06, 2.258732709517281e-06, 2.316639323547071e-06, 2.376049048948248e-06, 2.43700184038838e-06, 2.499538761262564e-06, 2.563702016724053e-06, 2.629534987803653e-06, 2.697082266658191e-06, 2.766389692990206e-06, 2.837504391682549e-06, 2.910474811693505e-06, 2.985350766259984e-06, 3.062183474458147e-06, 3.141025604173183e-06, 3.221931316531826e-06, 3.304956311853648e-06, 3.390157877179578e-06, 3.477594935438285e-06, 3.567328096314095e-06, 3.659419708882366e-06, 3.753933916081438e-06, 3.850936711092946e-06, 3.950495995705635e-06, 4.052681640740821e-06, 4.157565548621148e-06, 4.26522171816787e-06, 4.375726311715435e-06, 4.489157724636069e-06, 4.605596657371278e-06, 4.725126190071075e-06, 4.847831859946503e-06, 4.973801741445469e-06, 5.103126529366551e-06, 5.235899625031216e-06, 5.372217225639138e-06, 5.512178416937713e-06, 5.655885269342349e-06, 5.80344293764997e-06, 5.95495976449491e-06, 6.110547387702559e-06, 6.270320851703566e-06, 6.434398723178299e-06, 6.602903211108702e-06, 6.775960291423266e-06, 6.953699836428511e-06, 7.136255749229294e-06, 7.323766103349556e-06, 7.516373287774115e-06, 7.714224157642622e-06, 7.91747019083694e-06, 8.126267650714002e-06, 8.340777755247751e-06, 8.561166852855574e-06, 8.787606605197257e-06, 9.020274177247317e-06, 9.259352434955146e-06, 9.505030150822097e-06, 9.757502217738985e-06, 1.001696987144329e-05, 1.028364092197168e-05, 1.055772999450077e-05, 1.0839458779986e-05, 1.112905629602823e-05, 1.142675915841621e-05, 1.173281186381398e-05, 1.204746708408336e-05, 1.237098597275397e-05, 1.270363848417612e-05, 1.304570370591684e-05, 1.339747020498404e-05, 1.375923638849083e-05, 1.413131087939925e-05, 1.451401290801199e-05, 1.490767271991047e-05, 1.53126320010698e-05, 1.572924432091341e-05, 1.615787559410498e-05, 1.659890456191121e-05, 1.7052723294006e-05, 1.751973771162629e-05, 1.800036813303041e-05, 1.849504984225224e-05, 1.900423368218909e-05, 1.952838667310705e-05, 2.006799265769699e-05, 2.062355297386321e-05, 2.119558715648113e-05, 2.178463366941222e-05, 2.239125066912526e-05, 2.301601680132864e-05, 2.365953203208287e-05, 2.432241851492516e-05, 2.50053214956063e-05, 2.570891025610848e-05, 2.643387909968599e-05, 2.718094837874647e-05, 2.795086556746789e-05, 2.874440638112865e-05, 2.956237594421226e-05, 3.04056100094374e-05, 3.127497622995354e-05, 3.217137548703872e-05, 3.309574327573341e-05, 3.404905115094743e-05, 3.503230823668134e-05, 3.604656280111446e-05, 3.709290390042427e-05, 3.817246309432057e-05, 3.928641623639706e-05, 4.043598534253148e-05, 4.162244054069351e-05, 4.28471021056538e-05, 4.411134258222683e-05, 4.541658900082204e-05, 4.676432518922575e-05, 4.815609418468763e-05, 4.959350075054073e-05, 5.107821400174669e-05, 5.261197014391802e-05, 5.419657533054572e-05, 5.583390864332553e-05, 5.752592520066046e-05, 5.927465939959615e-05, 6.108222829662819e-05, 6.29508351330196e-05, 6.48827730104462e-05, 6.688042872299716e-05, 6.894628675174788e-05, 7.108293342832906e-05, 7.329306127412147e-05, 7.557947352190889e-05, 7.794508882703314e-05, 8.039294617530346e-05, 8.292620999512036e-05, 8.554817548148774e-05, 8.826227413979024e-05, 9.107207955742457e-05, 9.39813134115736e-05, 9.699385172161156e-05, 0.0001001137313548328, 0.0001033451567943701, 0.000106692507178368, 0.0001101603436196351, 0.0001137534168151704, 0.0001174766749551081, 0.0001213352719407555, 0.0001253345759215281, 0.00012948017816068, 0.0001337779022398075, 0.0001382338136121729, 0.0001428542295149046, 0.000147645729250155, 0.0001526151648452444, 0.0001577696721017641, 0.0001631166820434966, 0.0001686639327728623, 0.0001744194817453883, 0.0001803917184714595, 0.0001865893776542678, 0.0001930215527725232, 0.0001996977101160234, 0.0002066277032816549, 0.0002138217881367948, 0.0002212906382563677, 0.0002290453608390341, 0.0002370975131070658, 0.0002454591191934483, 0.0002541426875186208, 0.0002631612286579689, 0.0002725282736997909, 0.0002822578930918748, 0.0002923647159730898, 0.0003028639499845008, 0.0003137714015523937, 0.0003251034966333311, 0.0003368773019088259, 0.000349110546414498, 0.0003618216435855949, 0.0003750297136975223, 0.0003887546066765446, 0.0004030169252520046, 0.0004178380484173324, 0.0004332401551627171, 0.0004492462484375338, 0.0004658801792955874, 0.0004831666711707092, 0.0005011313442244417, 0.0005198007397012993, 0.0005392023442203859, 0.0005593646139251381, 0.0005803169984053389, 0.0006020899642976002, 0.0006247150184620065, 0.0006482247306236475, 0.0006726527553583241, 0.0006980338532917199, 0.0007244039113708554, 0.0007517999620556482, 0.0007802602012668676, 0.0008098240049147691, 0.0008405319438201315, 0.0008724257968263771, 0.0009055485618879733, 0.0009399444649062675, 0.0009756589660695779, 0.001012738763439502, 0.001051231793510235, 0.001091187228452302, 0.001132655469736277, 0.001175688137816335, 0.001220338057537391, 0.001266659238913721, 0.001314706852911054, 0.001364537201848549, 0.001416207684021863, 0.00146977675213381, 0.001525303865105057, 0.001582849432824352, 0.001642474753385564, 0.001704241942348383, 0.001768213853550264, 0.001834453990990066, 0.001903026411298771, 0.001973995616309983, 0.002047426435243312, 0.002123383896016951, 0.00220193308521291, 0.002283138996229259, 0.002367066365169121, 0.002453779494036521, 0.00254334206083471, 0.002635816916194106, 0.002731265866194772, 0.002829749441092876, 0.002931326649712751, 0.003036054719325715, 0.003143988820905087, 0.003255181779723802, 0.003369683771346931, 0.00348754200316755, 0.00360880038173978, 0.003733499166279003, 0.003861674608825505, 0.003993358581704583, 0.004128578193063513, 0.004267355391422999, 0.004409706560348091, 0.00455564210451999, 0.004705166028674775, 0.0048582755110677, 0.005014960473319298, 0.005175203148702131, 0.00533897765113149, 0.005506249547327175, 0.005676975434814845, 0.005851102528629216, 0.006028568259765364, 0.006209299888593084, 0.006393214136597833, 0.006580216839935491, 0.006770202628379017, 0.006963054633288453, 0.007158644228242475, 0.007356830805922492, 0.00755746159473111, 0.007760371518445146, 0.007965383101941495, 0.008172306425681024, 0.008380939131181674, 0.008591066479147718, 0.008802461461236788, 0.009014884965631558, 0.00922808599562996, 0.009441801939368857, 0.009655758887546268, 0.009869671994601453, 0.01008324587725062, 0.01029617504255943, 0.01050814433586876, 0.0107188293968871, 0.01092789711013751, 0.01113500603372169, 0.01133980678806751, 0.01154194238399646, 0.01174104846713155, 0.01193675345341887, 0.01212867852842691, 0.01231643748119432, 0.01249963634181146, 0.01267787279074891, 0.01285073530730342, 0.01301780202455192, 0.01317863925902863, 0.0133327996851272, 0.01347982012714403, 0.01361921894609488, 0.01375049300413619, 0.01387311419678376, 0.01398652555232439, 0.01409013690902692, 0.01418332019412905, 0.0142654043442288, 0.01433566992473324, 0.01439338676009138, 0.01443796253918846, 0.01446885437510783, 0.0144855343107321, 0.01448749249154847, 0.0144742404965952, 0.01444531482077133, 0.0144002804997924, 0.0143387348669685, 0.01426031142871785, 0.01416468384330507, 0.01405156998469585, 0.01392073607069443, 0.01377200083163599, 0.01360523969289028, 0.01342038894131095, 0.013217449842514, 0.01299649267258092, 0.01275766062441454, 0.01250117354560537, 0.01222733146133242, 0.01193651783252089, 0.01162920249633459, 0.01130594423307878, 0.01096739290082778, 0.01061429107666001, 0.01024747514128668, 0.009867875742286716, 0.009476517570100373, 0.00907451838055362, 0.008663087198083744, 0.008243521635076537, 0.007817204265018418, 0.007385597990544723, 0.006950240352122744, 0.006512736729177349, 0.006074752393026822, 0.005638003380278617, 0.005204246166382515, 0.004775266132021477, 0.00435286483007441, 0.003938846078029912, 0.003535000920137449, 0.00314309152521014, 0.002764834109884873, 0.002401881003280454, 0.00205580199719449, 0.00172806515619453, 0.001420017293849996, 0.001132864354675384, 0.000867651975734185, 0.000625246536737345, 0.0004063170424460916, 0.00021131821551712, 4.047521101488427e-05, -0.0001062296050816924, -0.0002290673427493284, -0.0003285702447563821, -0.0004055357976942009, -0.0004610267662981116, -0.0004963666111035547, -0.0005131297474727792, -0.0005131261099245166, -0.0004983795006077707, -0.000471099225711813, -0.0004336445598795979, -0.0003884816276605862, -0.0003381323542647197, -0.0002851152171476634, -0.0002318776272919876, -0.0001807198866849435, -0.0001337108090132544, -9.259525670568536e-05, -5.869404225083974e-05, -3.279686834498086e-05, -1.504924324233214e-05, -4.834608033965626e-06, -6.532546449611321e-07, -5.551115123125783e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 2 + }, + { + "i": 2, + "radial_function": [ + 2.430228051275159e-15, 2.554828487682692e-15, 2.685817323498554e-15, 2.823522098841808e-15, 2.968287147162991e-15, 3.120474456255856e-15, 3.280464573414104e-15, 3.448657556996467e-15, 3.625473976779404e-15, 3.811355965599069e-15, 4.006768324911848e-15, 4.212199687038248e-15, 4.428163736996152e-15, 4.655200496978733e-15, 4.893877676688895e-15, 5.144792092906575e-15, 5.40857116183887e-15, 5.685874467984366e-15, 5.977395413434715e-15, 6.283862951737674e-15, 6.606043410656817e-15, 6.944742408386148e-15, 7.300806868010894e-15, 7.675127135251663e-15, 8.068639204787541e-15, 8.482327060724791e-15, 8.917225137063898e-15, 9.37442090431704e-15, 9.855057588744004e-15, 1.03603370310061e-14, 1.089152269138576e-14, 1.144994280908698e-14, 1.203699372351595e-14, 1.265414336584697e-14, 1.330293492960454e-14, 1.398499072943925e-14, 1.470201625774716e-14, 1.545580444927585e-14, 1.624824016438068e-14, 1.708130490214223e-14, 1.795708175512905e-14, 1.887776061819635e-14, 1.98456436643443e-14, 2.086315110132873e-14, 2.193282722341889e-14, 2.305734677343358e-14, 2.423952163096559e-14, 2.548230784351688e-14, 2.678881301812677e-14, 2.816230409197546e-14, 2.960621550139297e-14, 3.11241577697011e-14, 3.271992653536109e-14, 3.439751204300245e-14, 3.616110912106582e-14, 3.801512767100673e-14, 3.99642036942913e-14, 4.201321088475487e-14, 4.416727281531044e-14, 4.643177574948102e-14, 4.881238210978812e-14, 5.131504463667739e-14, 5.394602127338325e-14, 5.671189081395264e-14, 5.961956935355655e-14, 6.267632758222037e-14, 6.588980896521939e-14, 6.926804885559653e-14, 7.281949458659359e-14, 7.655302659423848e-14, 8.047798062290078e-14, 8.460417106934578e-14, 8.894191552365391e-14, 9.350206056837117e-14, 9.829600890040289e-14, 1.033357478434627e-13, 1.086338793223782e-13, 1.142036513741991e-13, 1.200589912749016e-13, 1.262145403645219e-13, 1.326856906577954e-13, 1.394886233318513e-13, 1.466403491871924e-13, 1.541587511831371e-13, 1.620626291540808e-13, 1.703717468183838e-13, 1.791068811974372e-13, 1.882898745684701e-13, 1.979436890810092e-13, 2.080924641735555e-13, 2.187615769340403e-13, 2.29977705555e-13, 2.417688960421282e-13, 2.541646323430075e-13, 2.671959100713802e-13, 2.808953140112893e-13, 2.952970995949008e-13, 3.104372785577243e-13, 3.263537089854151e-13, 3.430861899773167e-13, 3.606765611634282e-13, 3.791688073236572e-13, 3.986091683709315e-13, 4.190462549731806e-13, 4.405311701033051e-13, 4.631176368210259e-13, 4.868621326061684e-13, 5.118240305792314e-13, 5.380657479623771e-13, 5.65652902152049e-13, 5.946544747934489e-13, 6.25142984267153e-13, 6.571946670191406e-13, 6.90889668187626e-13, 7.263122420033632e-13, 7.635509624644631e-13, 8.026989448125385e-13, 8.438540783639297e-13, 8.871192712781771e-13, 9.32602707875781e-13, 9.80418119148598e-13, 1.030685067139308e-12, 1.083529243900961e-12, 1.13908278578414e-12, 1.197484603837579e-12, 1.258880731148311e-12, 1.323424687989899e-12, 1.391277865691685e-12, 1.462609930188894e-12, 1.53759924626263e-12, 1.616433323530436e-12, 1.699309285302653e-12, 1.786434361476779e-12, 1.878026406702293e-12, 1.974314445111511e-12, 2.075539242978421e-12, 2.18195391073741e-12, 2.293824535867085e-12, 2.411430848221542e-12, 2.535066919472733e-12, 2.665041898412556e-12, 2.801680783953339e-12, 2.94532523775931e-12, 3.096334438540882e-12, 3.255085980147777e-12, 3.421976815706332e-12, 3.597424250161742e-12, 3.781866983706723e-12, 3.975766208705441e-12, 4.179606762855386e-12, 4.393898341470128e-12, 4.61917677191423e-12, 4.856005353376429e-12, 5.104976265330947e-12, 5.366712048208375e-12, 5.641867159977866e-12, 5.93112961253274e-12, 6.235222691970494e-12, 6.554906767068222e-12, 6.890981190475151e-12, 7.244286297375082e-12, 7.615705506616297e-12, 8.006167529561494e-12, 8.416648692180378e-12, 8.848175376190594e-12, 9.301826585349306e-12, 9.778736643312778e-12, 1.028009802980717e-11, 1.080716436220245e-11, 1.13612535299431e-11, 1.194375098967119e-11, 1.255611322928128e-11, 1.319987140956545e-11, 1.38766351925541e-11, 1.458809676612277e-11, 1.533603507492582e-11, 1.612232026823598e-11, 1.694891837580655e-11, 1.781789622344794e-11, 1.873142660060622e-11, 1.969179369286106e-11, 2.070139879292652e-11, 2.176276630442797e-11, 2.287855005346671e-11, 2.405153992374879e-11, 2.528466883186401e-11, 2.658102006015479e-11, 2.79438349655016e-11, 2.937652108329839e-11, 3.088266064687495e-11, 3.246601954366069e-11, 3.413055673048201e-11, 3.588043413152314e-11, 3.77200270436965e-11, 3.965393507543111e-11, 4.168699364621922e-11, 4.382428607567159e-11, 4.607115629229177e-11, 4.843322219374122e-11, 5.091638969198842e-11, 5.352686747844393e-11, 5.627118254599528e-11, 5.91561965081079e-11, 6.218912274382077e-11, 6.537754446178264e-11, 6.872943359547854e-11, 7.225317081640647e-11, 7.595756641098418e-11, 7.985188236355692e-11, 8.394585545186541e-11, 8.824972164957216e-11, 9.277424167139106e-11, 9.753072789823236e-11, 1.025310726314839e-10, 1.077877778660112e-10, 1.133139864834988e-10, 1.191235151646173e-10, 1.252308888893655e-10, 1.316513772691233e-10, 1.384010326985858e-10, 1.454967304968246e-10, 1.529562110992108e-10, 1.607981243701625e-10, 1.690420762597015e-10, 1.777086777871889e-10, 1.868195965845065e-10, 1.963976110266845e-10, 2.064666671928123e-10, 2.170519387196237e-10, 2.281798897130802e-10, 2.398783410390334e-10, 2.521765391777756e-10, 2.651052306402691e-10, 2.786967376070333e-10, 2.929850394815942e-10, 3.080058570884391e-10, 3.237967424884018e-10, 3.403971726274513e-10, 3.578486486910359e-10, 3.761947983992077e-10, 3.954814864174652e-10, 4.157569278188865e-10, 4.370718100103904e-10, 4.594794182927322e-10, 4.830357690994454e-10, 5.077997502525357e-10, 5.338332681550556e-10, 5.612014027134468e-10, 5.899725690368701e-10, 6.20218690125984e-10, 6.520153747547637e-10, 6.854421068054313e-10, 7.205824451817688e-10, 7.575242313872259e-10, 7.963598090359794e-10, 8.371862552101683e-10, 8.801056226369179e-10, 9.252251945871828e-10, 9.726577527214043e-10, 1.022521860639612e-09, 1.074942156909893e-09, 1.130049669290197e-09, 1.187982141002392e-09, 1.248884374184311e-09, 1.312908593430052e-09, 1.380214824088789e-09, 1.45097129376827e-09, 1.525354850629307e-09, 1.603551407581154e-09, 1.68575640418144e-09, 1.772175296647787e-09, 1.863024070417281e-09, 1.958529779349243e-09, 2.058931113536075e-09, 2.164478994211005e-09, 2.275437202036468e-09, 2.392083034707684e-09, 2.514707999782146e-09, 2.643618542933361e-09, 2.779136812387154e-09, 2.921601464650571e-09, 3.071368510354664e-09, 3.22881219827204e-09, 3.394325957139344e-09, 3.568323379008551e-09, 3.751239238893002e-09, 3.943530589910524e-09, 4.14567790977398e-09, 4.358186286055174e-09, 4.581586691508528e-09, 4.816437296331201e-09, 5.063324872018407e-09, 5.322866249211047e-09, 5.595709857709099e-09, 5.882537349989846e-09, 6.184065298932912e-09, 6.501046983080617e-09, 6.83427427126799e-09, 7.184579590933378e-09, 7.552838023685167e-09, 7.939969454621512e-09, 8.346940913907316e-09, 8.774768936091738e-09, 9.224522144574892e-09, 9.697323881109028e-09, 1.019435503301831e-08, 1.071685695565025e-08, 1.126613458873858e-08, 1.184355970105477e-08, 1.245057430144878e-08, 1.308869425816163e-08, 1.375951304804384e-08, 1.446470576363316e-08, 1.520603325297644e-08, 1.598534654016597e-08, 1.680459141418685e-08, 1.76658132843081e-08, 1.857116229664608e-08, 1.952289866219794e-08, 2.052339831033908e-08, 2.157515879667763e-08, 2.268080552165344e-08, 2.384309828767613e-08, 2.506493815360572e-08, 2.634937466306813e-08, 2.769961344773425e-08, 2.911902419623988e-08, 3.061114905254715e-08, 3.217971142995954e-08, 3.382862527264704e-08, 3.556200480852427e-08, 3.738417476994304e-08, 3.92996811713525e-08, 4.131330260872295e-08, 4.343006214359401e-08, 4.565523980085777e-08, 4.79943857045398e-08, 5.045333385764687e-08, 5.303821666125748e-08, 5.575548016215505e-08, 5.861190005916306e-08, 6.161459854400877e-08, 6.477106200370534e-08, 6.808915960019543e-08, 7.157716283897144e-08, 7.524376608796615e-08, 7.90981081696405e-08, 8.314979505368849e-08, 8.740892369454277e-08, 9.18861070776621e-08, 9.659250055337526e-08, 1.015398295194972e-07, 1.067404184800256e-07, 1.122072216169902e-07, 1.179538548879342e-07, 1.239946297946174e-07, 1.303445888027895e-07, 1.370195426460284e-07, 1.440361094654689e-07, 1.514117559413965e-07, 1.591648405569214e-07, 1.67314659004381e-07, 1.758814919362966e-07, 1.848866551307941e-07, 1.943525521833608e-07, 2.043027299056199e-07, 2.147619364939126e-07, 2.257561827080217e-07, 2.373128060472254e-07, 2.494605383162185e-07, 2.622295764867525e-07, 2.756516572343685e-07, 2.89760135201013e-07, 3.045900652382072e-07, 3.201782888123854e-07, 3.365635247681924e-07, 3.537864647033668e-07, 3.718898731321766e-07, 3.909186927371253e-07, 4.109201549127435e-07, 4.319438959383908e-07, 4.540420789299275e-07, 4.772695220317053e-07, 5.016838330197951e-07, 5.273455506821809e-07, 5.543182933769286e-07, 5.82668915008615e-07, 6.124676689011084e-07, 6.437883798821282e-07, 6.767086250340819e-07, 7.11309923534721e-07, 7.476779360341917e-07, 7.85902674017978e-07, 8.260787197117927e-07, 8.683054569839516e-07, 9.126873138204925e-07, 9.59334016992909e-07, 1.008360859395227e-06, 1.059888980821337e-06, 1.114045662752248e-06, 1.170964637879873e-06, 1.230786415068734e-06, 1.293658620556092e-06, 1.35973635613281e-06, 1.42918257520277e-06, 1.502168477537235e-06, 1.578873923674077e-06, 1.659487869917948e-06, 1.744208824939169e-06, 1.833245329024741e-06, 1.926816457114766e-06, 2.025152346700825e-06, 2.128494751924747e-06, 2.237097624996729e-06, 2.351227726390031e-06, 2.47116526510211e-06, 2.597204570508233e-06, 2.729654797275517e-06, 2.868840664964461e-06, 3.015103233942272e-06, 3.168800719427784e-06, 3.330309345353114e-06, 3.500024240133859e-06, 3.678360376130408e-06, 3.86575355508414e-06, 4.062661441543911e-06, 4.269564646636338e-06, 4.486967864573318e-06, 4.715401064301364e-06, 4.955420738993704e-06, 5.207611215971928e-06, 5.47258603001022e-06, 5.750989362863409e-06, 6.043497552193299e-06, 6.35082067303536e-06, 6.673704195213755e-06, 7.012930720168127e-06, 7.369321800882426e-06, 7.74373984865601e-06, 8.13709013075345e-06, 8.550322863003395e-06, 8.984435401690793e-06, 9.440474539204079e-06, 9.919538908104081e-06, 1.04227814984686e-05, 1.095141229356336e-05, 1.150670102909202e-05, 1.20899800814903e-05, 1.270264749091665e-05, 1.334617012487741e-05, 1.402208698855056e-05, 1.473201268820975e-05, 1.547764105431164e-05, 1.626074893110043e-05, 1.708320013980627e-05, 1.794694962275941e-05, 1.885404777608841e-05, 1.980664497875904e-05, 2.080699632621051e-05, 2.185746657690638e-05, 2.2960535320548e-05, 2.411880237687469e-05, 2.533499343435209e-05, 2.661196593823887e-05, 2.795271523789707e-05, 2.936038100339243e-05, 3.083825392190845e-05, 3.238978268448752e-05, 3.401858127417495e-05, 3.572843656679728e-05, 3.752331625580306e-05, 3.940737711302575e-05, 4.13849735973322e-05, 4.346066682342408e-05, 4.563923390330997e-05, 4.792567767312314e-05, 5.032523681816502e-05, 5.284339640928596e-05, 5.548589886372889e-05, 5.825875534386136e-05, 6.11682576070953e-05, 6.422099032054428e-05, 6.742384385380723e-05, 7.078402756337258e-05, 7.430908358190839e-05, 7.800690112570318e-05, 8.188573133318525e-05, 8.595420264726543e-05, 9.022133675385858e-05, 9.469656508850946e-05, 9.938974592252135e-05, 0.0001043111820393754, 0.0001094716390115041, 0.0001148823640865816, 0.000120555105691569, 0.0001265021335616435, 0.0001327362594997917, 0.0001392708587714986, 0.0001461198921372869, 0.0001532979285241491, 0.000160820168334739, 0.0001687024673911685, 0.0001769613615076092, 0.0001856140916832426, 0.0001946786299041397, 0.000204173705539255, 0.0002141188323121992, 0.0002245343358264645, 0.0002354413816174784, 0.0002468620037002045, 0.0002588191335758862, 0.0002713366296560342, 0.0002844393070558277, 0.0002981529677025748, 0.000312504430698035, 0.0003275215628658266, 0.0003432333094072306, 0.0003596697245799178, 0.0003768620023050985, 0.0003948425065985835, 0.0004136448017107672, 0.0004333036818493705, 0.0004538552003467858, 0.0004753366981213076, 0.0004977868312680412, 0.0005212455976011704, 0.0005457543619542778, 0.0005713558800296986, 0.0005980943205712942, 0.0006260152856177383, 0.0006551658285752462, 0.0006855944698297639, 0.0007173512095989201, 0.0007504875377037188, 0.0007850564399187515, 0.0008211124005380118, 0.0008587114007711333, 0.0008979109125618837, 0.0009387698873978118, 0.000981348739656158, 0.001025709324007708, 0.001071914906376532, 0.001120030127930075, 0.001170120961551084, 0.001222254660220335, 0.001276499696717622, 0.001332925694028123, 0.001391603345822392, 0.001452604326361381, 0.001516001189163269, 0.001581867253757025, 0.001650276479839193, 0.001721303328145603, 0.001795022607349563, 0.001871509306302876, 0.001950838410946703, 0.002033084705236545, 0.002118322555450132, 0.002206625677279963, 0.002298066885154144, 0.00239271782328128, 0.002490648677978496, 0.002591927870916839, 0.00269662173300719, 0.002804794158752478, 0.002916506241010382, 0.003031815886245572, 0.003150777410502592, 0.00327344111650167, 0.003399852852449649, 0.003530053553369027, 0.00366407876597947, 0.003801958158418979, 0.003943715016366723, 0.004089365727425568, 0.004238919255940107, 0.004392376610764206, 0.004549730308849366, 0.004710963837900846, 0.004876051121738272, 0.005044955992400175, 0.005217631673442345, 0.005394020279293294, 0.005574052335941617, 0.005757646328630731, 0.005944708282619989, 0.006135131383426333, 0.006328795643276962, 0.006525567620768418, 0.006725300200925559, 0.006927832442970083, 0.007132989503123939, 0.007340582639667846, 0.007550409307228858, 0.007762253346858158, 0.007975885277857444, 0.0081910626964919, 0.008407530785661655, 0.008625022938264285, 0.008843261495336884, 0.00906195859809064, 0.009280817150613498, 0.009499531887291746, 0.009717790535865207, 0.009935275063462435, 0.01015166298894839, 0.01036662874044923, 0.0105798450319984, 0.01079098422788814, 0.01099971965753541, 0.01120572683752308, 0.01140868455101617, 0.01160827572805864, 0.01180418806343696, 0.0119961143019824, 0.01218375211454012, 0.01236680348155585, 0.01254497349555179, 0.01271796848895421, 0.01288549339010526, 0.0130472482081918, 0.01320292354764521, 0.01335219505473747, 0.01349471670408587, 0.01363011284107469, 0.01375796890832769, 0.01387782080084797, 0.01398914281581518, 0.01409133418980647, 0.01418370424886634, 0.01426545623580948, 0.01433566992473324, 0.01439338676009138, 0.01443796253918846, 0.01446885437510783, 0.0144855343107321, 0.01448749249154847, 0.0144742404965952, 0.01444531482077133, 0.0144002804997924, 0.0143387348669685, 0.01426031142871785, 0.01416468384330507, 0.01405156998469585, 0.01392073607069443, 0.01377200083163599, 0.01360523969289028, 0.01342038894131095, 0.013217449842514, 0.01299649267258092, 0.01275766062441454, 0.01250117354560537, 0.01222733146133242, 0.01193651783252089, 0.01162920249633459, 0.01130594423307878, 0.01096739290082778, 0.01061429107666001, 0.01024747514128668, 0.009867875742286716, 0.009476517570100373, 0.00907451838055362, 0.008663087198083744, 0.008243521635076537, 0.007817204265018418, 0.007385597990544723, 0.006950240352122744, 0.006512736729177349, 0.006074752393026822, 0.005638003380278617, 0.005204246166382515, 0.004775266132021477, 0.00435286483007441, 0.003938846078029912, 0.003535000920137449, 0.00314309152521014, 0.002764834109884873, 0.002401881003280454, 0.00205580199719449, 0.00172806515619453, 0.001420017293849996, 0.001132864354675384, 0.000867651975734185, 0.000625246536737345, 0.0004063170424460916, 0.00021131821551712, 4.047521101488427e-05, -0.0001062296050816924, -0.0002290673427493284, -0.0003285702447563821, -0.0004055357976942009, -0.0004610267662981116, -0.0004963666111035547, -0.0005131297474727792, -0.0005131261099245166, -0.0004983795006077707, -0.000471099225711813, -0.0004336445598795979, -0.0003884816276605862, -0.0003381323542647197, -0.0002851152171476634, -0.0002318776272919876, -0.0001807198866849435, -0.0001337108090132544, -9.259525670568536e-05, -5.869404225083974e-05, -3.279686834498086e-05, -1.504924324233214e-05, -4.834608033965626e-06, -6.532546449611321e-07, -5.551115123125783e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 2, + "j": 2 + }, + { + "i": 2, + "radial_function": [ + 9.820489479254455e-10, 1.006909731274244e-09, 1.032399873265267e-09, 1.058535306440413e-09, 1.085332366688723e-09, 1.112807803457823e-09, 1.140978790223898e-09, 1.16986293522677e-09, 1.199478292476753e-09, 1.229843373040202e-09, 1.260977156610794e-09, 1.292899103373762e-09, 1.325629166170564e-09, 1.359187802971496e-09, 1.393595989664153e-09, 1.428875233165646e-09, 1.465047584866822e-09, 1.5021356544169e-09, 1.54016262385711e-09, 1.579152262112221e-09, 1.619128939848934e-09, 1.660117644710555e-09, 1.702143996937372e-09, 1.745234265382548e-09, 1.789415383933574e-09, 1.834714968349444e-09, 1.881161333524257e-09, 1.928783511187876e-09, 1.977611268054787e-09, 2.027675124432574e-09, 2.079006373301455e-09, 2.131637099877096e-09, 2.185600201668634e-09, 2.240929409044716e-09, 2.297659306320236e-09, 2.355825353377076e-09, 2.415463907832296e-09, 2.476612247767684e-09, 2.539308595034901e-09, 2.603592139150758e-09, 2.669503061797558e-09, 2.737082561943918e-09, 2.806372881601689e-09, 2.877417332235184e-09, 2.950260321839132e-09, 3.024947382702338e-09, 3.101525199874534e-09, 3.180041640354018e-09, 3.260545783014558e-09, 3.343087949290107e-09, 3.427719734636646e-09, 3.514494040790865e-09, 3.603465108845699e-09, 3.694688553163598e-09, 3.788221396148793e-09, 3.884122103899963e-09, 3.982450622766104e-09, 4.08326841682816e-09, 4.18663850632996e-09, 4.292625507082501e-09, 4.401295670866333e-09, 4.51271692685717e-09, 4.62695892410084e-09, 4.744093075064019e-09, 4.864192600288145e-09, 4.987332574174283e-09, 5.113589971927856e-09, 5.243043717692406e-09, 5.375774733902676e-09, 5.511865991887837e-09, 5.651402563756573e-09, 5.794471675596574e-09, 5.941162762021573e-09, 6.091567522100384e-09, 6.245779976702674e-09, 6.403896527297598e-09, 6.566016016242003e-09, 6.73223978859604e-09, 6.90267175550489e-09, 7.077418459186357e-09, 7.256589139564862e-09, 7.440295802593923e-09, 7.628653290309578e-09, 7.821779352658898e-09, 8.01979472114859e-09, 8.22282318435956e-09, 8.430991665375245e-09, 8.644430301171884e-09, 8.863272524020565e-09, 9.087655144952344e-09, 9.317718439338171e-09, 9.553606234638068e-09, 9.795466000373987e-09, 1.004344894038293e-08, 1.029771008740845e-08, 1.055840840008934e-08, 1.082570686240702e-08, 1.109977258565334e-08, 1.13807769129832e-08, 1.166889552661772e-08, 1.19643085577645e-08, 1.226720069932498e-08, 1.257776132145883e-08, 1.289618459007814e-08, 1.322266958834622e-08, 1.355742044125653e-08, 1.390064644337068e-08, 1.425256218979524e-08, 1.461338771048002e-08, 1.498334860792183e-08, 1.536267619836033e-08, 1.57516076565549e-08, 1.615038616423333e-08, 1.655926106230556e-08, 1.697848800693834e-08, 1.740832912958883e-08, 1.784905320109763e-08, 1.830093579994497e-08, 1.876425948477496e-08, 1.923931397129753e-08, 1.97263963136784e-08, 2.022581109053171e-08, 2.073787059563255e-08, 2.126289503346898e-08, 2.180121271975659e-08, 2.2353160287043e-08, 2.291908289552982e-08, 2.3499334449247e-08, 2.409427781771426e-08, 2.470428506322997e-08, 2.532973767393052e-08, 2.597102680276744e-08, 2.662855351255241e-08, 2.730272902722539e-08, 2.799397498950399e-08, 2.870272372507634e-08, 2.942941851350537e-08, 3.017451386601422e-08, 3.09384758103289e-08, 3.172178218275849e-08, 3.252492292769591e-08, 3.334840040473097e-08, 3.419272970356723e-08, 3.505843896694414e-08, 3.594606972176699e-08, 3.685617721865504e-08, 3.77893307801226e-08, 3.874611415761376e-08, 3.972712589761639e-08, 4.073297971708771e-08, 4.176430488842894e-08, 4.282174663425438e-08, 4.390596653220333e-08, 4.501764293005311e-08, 4.615747137139685e-08, 4.732616503215483e-08, 4.852445516819786e-08, 4.975309157436735e-08, 5.101284305518289e-08, 5.230449790753703e-08, 5.362886441568521e-08, 5.498677135884437e-08, 5.637906853172506e-08, 5.780662727832765e-08, 5.927034103934396e-08, 6.077112591351042e-08, 6.230992123327737e-08, 6.388769015515328e-08, 6.550542026510815e-08, 6.716412419942073e-08, 6.886484028136369e-08, 7.060863317413968e-08, 7.239659455048017e-08, 7.422984377934147e-08, 7.610952863013373e-08, 7.803682599493644e-08, 8.001294262916614e-08, 8.203911591116595e-08, 8.411661462121043e-08, 8.624673974042715e-08, 8.843082527014137e-08, 9.06702390721834e-08, 9.296638373068755e-08, 9.53206974359485e-08, 9.773465489090124e-08, 1.002097682408068e-07, 1.027475880267526e-07, 1.053497041635776e-07, 1.08017746942858e-07, 1.107533880616034e-07, 1.135583416773306e-07, 1.164343654902005e-07, 1.193832618529192e-07, 1.224068789091274e-07, 1.255071117610206e-07, 1.286859036669545e-07, 1.319452472698275e-07, 1.352871858570299e-07, 1.387138146527959e-07, 1.422272821437957e-07, 1.458297914388369e-07, 1.495236016635758e-07, 1.533110293911395e-07, 1.571944501096137e-07, 1.611762997273562e-07, 1.652590761171243e-07, 1.694453407000463e-07, 1.737377200704706e-07, 1.781389076627769e-07, 1.826516654612545e-07, 1.872788257541721e-07, 1.920232929332201e-07, 1.968880453395078e-07, 2.018761371573568e-07, 2.069907003571513e-07, 2.12234946688539e-07, 2.176121697253316e-07, 2.231257469634555e-07, 2.287791419733848e-07, 2.345759066084898e-07, 2.405196832707916e-07, 2.466142072356656e-07, 2.528633090370502e-07, 2.592709169147855e-07, 2.658410593257531e-07, 2.725778675205028e-07, 2.794855781871542e-07, 2.865685361643516e-07, 2.93831197225145e-07, 3.012781309337115e-07, 3.089140235768599e-07, 3.167436811723742e-07, 3.247720325562338e-07, 3.330041325508816e-07, 3.414451652167257e-07, 3.501004471891232e-07, 3.589754311032078e-07, 3.680757091089177e-07, 3.774070164787163e-07, 3.869752353105267e-07, 3.96786398328489e-07, 4.068466927842434e-07, 4.171624644614772e-07, 4.277402217866042e-07, 4.385866400484952e-07, 4.497085657302719e-07, 4.611130209562912e-07, 4.72807208057481e-07, 4.847985142583644e-07, 4.970945164891292e-07, 5.097029863262553e-07, 5.22631895065293e-07, 5.358894189294993e-07, 5.494839444181592e-07, 5.634240737985183e-07, 5.77718630745395e-07, 5.923766661326407e-07, 6.074074639807651e-07, 6.228205475651725e-07, 6.386256856895763e-07, 6.548328991293266e-07, 6.714524672495146e-07, 6.884949348028696e-07, 7.059711189126407e-07, 7.238921162457852e-07, 7.422693103820086e-07, 7.611143793843047e-07, 7.804393035768779e-07, 8.002563735365177e-07, 8.205781983036566e-07, 8.414177138195576e-07, 8.627881915963195e-07, 8.84703247626534e-07, 9.071768515397499e-07, 9.302233360130068e-07, 9.538574064430806e-07, 9.780941508882353e-07, 1.002949050287533e-06, 1.028437988966123e-06, 1.054577265435089e-06, 1.081383603494776e-06, 1.10887416365086e-06, 1.137066554852642e-06, 1.165978846563464e-06, 1.195629581173425e-06, 1.226037786764918e-06, 1.257222990241957e-06, 1.289205230834495e-06, 1.32200507398945e-06, 1.355643625660567e-06, 1.390142547009486e-06, 1.425524069531113e-06, 1.461811010616587e-06, 1.499026789567731e-06, 1.537195444077442e-06, 1.576341647190743e-06, 1.616490724762086e-06, 1.657668673424727e-06, 1.699902179088823e-06, 1.743218635985363e-06, 1.787646166273684e-06, 1.833213640231078e-06, 1.879950697043465e-06, 1.927887766217088e-06, 1.977056089631709e-06, 2.027487744256558e-06, 2.079215665551289e-06, 2.132273671574736e-06, 2.186696487825374e-06, 2.242519772838169e-06, 2.299780144563424e-06, 2.358515207554315e-06, 2.418763580990708e-06, 2.480564927568015e-06, 2.543959983280892e-06, 2.608990588132725e-06, 2.67569971780317e-06, 2.744131516307129e-06, 2.814331329679912e-06, 2.88634574072482e-06, 2.960222604860509e-06, 3.036011087107434e-06, 3.11376170025373e-06, 3.193526344242937e-06, 3.275358346827419e-06, 3.35931250553318e-06, 3.445445130983626e-06, 3.533814091631693e-06, 3.624478859951933e-06, 3.717500560146026e-06, 3.812942017417596e-06, 3.910867808874331e-06, 4.01134431611795e-06, 4.114439779584921e-06, 4.220224354703634e-06, 4.328770169936238e-06, 4.440151386776349e-06, 4.554444261776939e-06, 4.671727210685412e-06, 4.79208087476675e-06, 4.915588189398372e-06, 5.042334455024406e-06, 5.172407410560577e-06, 5.305897309344806e-06, 5.442896997732711e-06, 5.583501996441583e-06, 5.727810584750488e-06, 5.875923887669365e-06, 6.027945966194244e-06, 6.183983910771317e-06, 6.344147938097596e-06, 6.508551491391475e-06, 6.677311344272455e-06, 6.850547708395577e-06, 7.028384344991734e-06, 7.21094868047276e-06, 7.398371926265948e-06, 7.590789203051309e-06, 7.788339669581245e-06, 7.991166656271034e-06, 8.199417803756697e-06, 8.413245206625365e-06, 8.632805562532842e-06, 8.85826032693209e-06, 9.089775873646655e-06, 9.327523661533641e-06, 9.571680407491402e-06, 9.822428266079015e-06, 1.007995501602621e-05, 1.034445425392529e-05, 1.061612559540948e-05, 1.089517488413596e-05, 1.118181440890618e-05, 1.147626312927121e-05, 1.177874690998542e-05, 1.208949876468852e-05, 1.240875910921245e-05, 1.273677602492925e-05, 1.307380553257271e-05, 1.342011187698834e-05, 1.377596782328551e-05, 1.414165496488732e-05, 1.451746404399793e-05, 1.490369528502751e-05, 1.530065874154324e-05, 1.570867465733827e-05, 1.6128073842238e-05, 1.655919806329205e-05, 1.70024004520291e-05, 1.745804592848316e-05, 1.792651164273154e-05, 1.840818743471958e-05, 1.890347631318076e-05, 1.941279495450062e-05, 1.993657422240803e-05, 2.047525970942049e-05, 2.102931230101125e-05, 2.159920876350932e-05, 2.218544235679138e-05, 2.278852347287067e-05, 2.340898030153994e-05, 2.404735952427668e-05, 2.470422703767444e-05, 2.538016870772118e-05, 2.607579115630569e-05, 2.679172258139422e-05, 2.752861361238708e-05, 2.828713820223007e-05, 2.906799455792909e-05, 2.987190611118887e-05, 3.069962253097504e-05, 3.155192077987759e-05, 3.242960621624068e-05, 3.333351374410836e-05, 3.426450901312949e-05, 3.522348967065928e-05, 3.621138666839363e-05, 3.722916562597714e-05, 3.827782825413198e-05, 3.935841383996867e-05, 4.047200079725433e-05, 4.161970828453619e-05, 4.280269789414693e-05, 4.402217541524473e-05, 4.527939267418363e-05, 4.657564945564633e-05, 4.791229550812431e-05, 4.929073263747781e-05, 5.071241689247399e-05, 5.217886084636055e-05, 5.369163597870934e-05, 5.525237516193742e-05, 5.686277525710085e-05, 5.852459982374625e-05, 6.023968194880486e-05, 6.20099271997144e-05, 6.383731670717247e-05, 6.572391038313835e-05, 6.767185027993362e-05, 6.96833640965167e-05, 7.176076883826006e-05, 7.390647463679746e-05, 7.612298873677106e-05, 7.841291965657504e-05, 8.077898153045841e-05, 8.322399863963665e-05, 8.575091014034639e-05, 8.836277499707249e-05, 9.106277712948372e-05, 9.385423078192192e-05, 9.674058612460344e-05, 9.972543509602736e-05, 0.0001028125174964017, 0.0001060057273422526, 0.0001093091194927137, 0.0001127269165583465, 0.0001162635161037013, 0.0001199234981551769, 0.0001237116330261129, 0.0001276328894714027, 0.0001316924431842881, 0.0001358956856483658, 0.0001402482333581957, 0.0001447559374222829, 0.0001494248935625499, 0.0001542614525247919, 0.0001592722309149479, 0.0001644641224763709, 0.0001698443098236165, 0.0001754202766485828, 0.0001811998204151531, 0.0001871910655587661, 0.0001934024772076193, 0.0001998428754424457, 0.0002065214501120223, 0.0002134477762217515, 0.0002206318299128017, 0.0002280840050493978, 0.0002358151304319216, 0.0002438364876534749, 0.0002521598296175249, 0.0002607973997341331, 0.0002697619518120763, 0.0002790667706639387, 0.00028872569344087, 0.0002987531317132974, 0.0003091640943133251, 0.000319974210953892, 0.0003311997566390115, 0.0003428576768784649, 0.0003549656137193108, 0.0003675419326053281, 0.0003806057500741265, 0.0003941769623001254, 0.0004082762744897901, 0.0004229252311335562, 0.000438146247116668, 0.0004539626396886459, 0.0004703986612884384, 0.0004874795332192327, 0.0005052314801635933, 0.0005236817655259742, 0.0005428587275855746, 0.0005627918164382165, 0.0005835116317010795, 0.0006050499609489548, 0.0006274398188450409, 0.0006507154869231568, 0.0006749125539716499, 0.0007000679569621061, 0.0007262200224582471, 0.0007534085084321213, 0.0007816746464057279, 0.0008110611838267109, 0.0008416124265764317, 0.0008733742814978542, 0.0009063942988189143, 0.0009407217143346329, 0.0009764074911979706, 0.001013504361155347, 0.001052066865047847, 0.001092151392383347, 0.001133816219768125, 0.001177121547968983, 0.00122212953735841, 0.001268904341475871, 0.001317512138418092, 0.001368021159749777, 0.001420501716604232, 0.001475026222620124, 0.001531669213336784, 0.001590507361645629, 0.001651619488869745, 0.00171508657101737, 0.001780991739728062, 0.001849420277402872, 0.001920459605981779, 0.001994199268803342, 0.002070730904953135, 0.002150148215478732, 0.002232546920820858, 0.002318024708782135, 0.002406681172327428, 0.002498617736483394, 0.002593937573579315, 0.002692745506047498, 0.002795147895979663, 0.002901252520615841, 0.003011168432925701, 0.003125005806428215, 0.003242875763385828, 0.003364890185503644, 0.003491161506263316, 0.003621802484026583, 0.003756925955054239, 0.003896644565604948, 0.004041070482304205, 0.0041903150800086, 0.004344488606434944, 0.004503699822878435, 0.004668055620410099, 0.004837660611022299, 0.005012616693282262, 0.00519302259215986, 0.005378973372816316, 0.005570559928277466, 0.005767868441069179, 0.005970979819063466, 0.006179969105974225, 0.006394904867149742, 0.006615848551537547, 0.006842853830945153, 0.007075965917987459, 0.007315220864399377, 0.007560644841697946, 0.007812253406503102, 0.008070050753167768, 0.008334028956724462, 0.008604167209526366, 0.008880431055340395, 0.009162771625037727, 0.009451124878417259, 0.009745410857085293, 0.01004553295369569, 0.01035137720321986, 0.01066281160225994, 0.01097968546273159, 0.01130182880651422, 0.01162905180788855, 0.01196114429073701, 0.01229787528756391, 0.01263899266738079, 0.0129842228393842, 0.01333327053911296, 0.01368581870338845, 0.01404152843980093, 0.01440003909578493, 0.0147609684314089, 0.01512391289887175, 0.01548844803032831, 0.01585412893404349, 0.01622049089698293, 0.01658705008977041, 0.01695330436747263, 0.017318734156897, 0.01768280341801139, 0.018044960663716, 0.01840464001853061, 0.01876126229282514, 0.01911423604504766, 0.01946295860003566, 0.01980681698699057, 0.02014518875612182, 0.02047744262842019, 0.02080293892860275, 0.02112102974712069, 0.02143105877338355, 0.02173236073920815, 0.02202426040914938, 0.02230607105303671, 0.02257709233598101, 0.02283660756260387, 0.02308388021557721, 0.02331814973406388, 0.02353862648565354, 0.02374448589623899, 0.02393486171632137, 0.02410883841980984, 0.02426544275281246, 0.02440363447548624, 0.0245222963699653, 0.02462022362188074, 0.02469619870621116, 0.02474928617385816, 0.02477865514424593, 0.02478350989414503, 0.02476309542130178, 0.02471670321013747, 0.02464367717856114, 0.0245434197811817, 0.02441539824021471, 0.02425915087119654, 0.02407429346624619, 0.02386052569306334, 0.02361763746319967, 0.02334551521836331, 0.02304414807870417, 0.02271363379223196, 0.02235418441974946, 0.02196613168509073, 0.02154993191603391, 0.02110617049714769, 0.02063556575213001, 0.02013897216995272, 0.01961738288654577, 0.01907193133187252, 0.01850389195124905, 0.01791467990979526, 0.01730584969005669, 0.01667909249535166, 0.01603623237535121, 0.01537922099599354, 0.01471013098324647, 0.01403114777954115, 0.01334455996316752, 0.01265274799457056, 0.0119581713695123, 0.01126335417754942, 0.01057086908523541, 0.009883319787009848, 0.009203321992775237, 0.008533483049662033, 0.007876380326310911, 0.007234538520882072, 0.006610406088707471, 0.006006331021547379, 0.005424536247328055, 0.004867094956375134, 0.004335906196709344, 0.003832671116100611, 0.003358870261188918, 0.002915742372948615, 0.002504265141815537, 0.002125138403524629, 0.0017787702666823, 0.001465266663820908, 0.001184424807646134, 0.0009357310119201545, 0.0007183633005151402, 0.0005311991773237768, 0.0003728288627935228, 0.0002415742189002812, 0.0001355134826467874, 5.251180823834245e-05, -9.7425202870427e-06, -5.369650081774036e-05, -8.188591851501781e-05, -9.688460515688835e-05, -0.0001012503194429692, -9.746833532456312e-05, -8.789405637771308e-05, -7.46962128506945e-05, -5.980244023162662e-05, -4.484927886408419e-05, -3.113886739579408e-05, -1.960482123725127e-05, -1.078998224465777e-05, -4.838887449121199e-06, -1.507921012861013e-06, -1.961714501674816e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 3 + }, + { + "i": 2, + "radial_function": [ + 4.303131968466807e-15, 4.523758226724812e-15, 4.755696232359118e-15, 4.999525950843584e-15, 5.255857083100984e-15, 5.525330590070686e-15, 5.808620295442605e-15, 6.106434570564963e-15, 6.419518105738866e-15, 6.748653772329238e-15, 7.09466458034774e-15, 7.458415736403233e-15, 7.84081680716534e-15, 8.242823993750997e-15, 8.665442522721226e-15, 9.109729159666447e-15, 9.576794851666169e-15, 1.006780750523006e-14, 1.058399490666689e-14, 1.112664779218387e-14, 1.169712307539273e-14, 1.229684724029356e-14, 1.292731990822018e-14, 1.359011758766639e-14, 1.428689761636957e-14, 1.501940230550859e-14, 1.57894632963791e-14, 1.659900614043971e-14, 1.745005511418159e-14, 1.834473828086142e-14, 1.928529281175378e-14, 2.027407058022986e-14, 2.131354404264972e-14, 2.240631242077355e-14, 2.355510820115147e-14, 2.476280396774263e-14, 2.603241958484968e-14, 2.736712974832914e-14, 2.877027192395912e-14, 3.024535469281528e-14, 3.179606652452116e-14, 3.342628500031203e-14, 3.514008650897342e-14, 3.694175643989957e-14, 3.883579989875978e-14, 4.082695297256558e-14, 4.292019457230959e-14, 4.51207588827865e-14, 4.743414845072782e-14, 4.986614794397743e-14, 5.242283861611127e-14, 5.511061351267237e-14, 5.793619345704208e-14, 6.09066438559209e-14, 6.402939236644262e-14, 6.73122474690939e-14, 7.076341799288577e-14, 7.439153364159675e-14, 7.820566657241335e-14, 8.221535408092733e-14, 8.64306224492091e-14, 9.086201201659411e-14, 9.552060353586794e-14, 1.004180458807548e-13, 1.055665851739923e-13, 1.109790954088239e-13, 1.166691106404818e-13, 1.226508588281515e-13, 1.2893929741204e-13, 1.355501507145103e-13, 1.42499949258796e-13, 1.498060711036203e-13, 1.574867852970706e-13, 1.655612975583834e-13, 1.740497983018727e-13, 1.829735131230728e-13, 1.923547558733471e-13, 2.02216984455667e-13, 2.125848594810795e-13, 2.234843059325334e-13, 2.349425779902456e-13, 2.469883271807145e-13, 2.596516740197715e-13, 2.729642833288201e-13, 2.869594434125874e-13, 3.016721492963611e-13, 3.171391902308575e-13, 3.333992416835102e-13, 3.504929620462073e-13, 3.684630943012925e-13, 3.873545729000266e-13, 4.072146361207777e-13, 4.28092944187868e-13, 4.500417034464332e-13, 4.731157969037951e-13, 4.973729214637394e-13, 5.228737321968718e-13, 5.496819940077711e-13, 5.778647410781844e-13, 6.074924444849486e-13, 6.386391884117282e-13, 6.713828553952182e-13, 7.058053210689732e-13, 7.41992658891818e-13, 7.800353553727694e-13, 8.200285363305637e-13, 8.62072204753618e-13, 9.062714908551039e-13, 9.527369149484183e-13, 1.00158466380034e-12, 1.052936881152847e-12, 1.106921973140052e-12, 1.163674929363919e-12, 1.223337660431543e-12, 1.286059352798044e-12, 1.351996841802225e-12, 1.421315003827836e-12, 1.494187168570933e-12, 1.570795552444185e-12, 1.651331714201817e-12, 1.735997033924385e-12, 1.825003216561097e-12, 1.918572821288658e-12, 2.016939818010275e-12, 2.120350172386287e-12, 2.229062460859121e-12, 2.343348517210509e-12, 2.463494112267452e-12, 2.589799668456511e-12, 2.72258101099308e-12, 2.862170157583749e-12, 3.008916148616474e-12, 3.163185919914138e-12, 3.325365220233767e-12, 3.495859575805484e-12, 3.675095304322689e-12, 3.863520580918973e-12, 4.061606558796939e-12, 4.269848547310799e-12, 4.488767250448485e-12, 4.718910068809554e-12, 4.960852468334536e-12, 5.215199419207735e-12, 5.482586908531159e-12, 5.763683530551819e-12, 6.059192158418109e-12, 6.36985170164544e-12, 6.696438953684975e-12, 7.039770534214913e-12, 7.40070493101069e-12, 7.780144646498825e-12, 8.179038454361808e-12, 8.598383771835638e-12, 9.03922915363141e-12, 9.502676913716377e-12, 9.989885881509025e-12, 1.050207429937971e-11, 1.104052286870086e-11, 1.160657795206213e-11, 1.22016549396573e-11, 1.282724178825828e-11, 1.348490274162541e-11, 1.417628224165453e-11, 1.490310904003971e-11, 1.566720052073151e-11, 1.647046724399599e-11, 1.731491772343747e-11, 1.820266344792515e-11, 1.913592416098095e-11, 2.011703341082668e-11, 2.114844438496463e-11, 2.223273604388116e-11, 2.337261956920417e-11, 2.457094514243766e-11, 2.583070907121923e-11, 2.715506128091457e-11, 2.854731319028167e-11, 3.001094599088817e-11, 3.154961935098458e-11, 3.316718056559039e-11, 3.48676741756661e-11, 3.66553520804223e-11, 3.853468416803998e-11, 4.051036949138188e-11, 4.258734801663063e-11, 4.477081297422144e-11, 4.706622384294984e-11, 4.947931999970479e-11, 5.20161350689544e-11, 5.468301200785239e-11, 5.748661896467132e-11, 6.043396595021144e-11, 6.353242236384925e-11, 6.6789735418043e-11, 7.021404950734757e-11, 7.381392657034958e-11, 7.759836749543029e-11, 8.157683462384894e-11, 8.575927540640484e-11, 9.015614727280552e-11, 9.477844377589626e-11, 9.963772207611302e-11, 1.047461318421992e-10, 1.101164455966972e-10, 1.15762090686059e-10, 1.216971828373423e-10, 1.279365614194502e-10, 1.344958266119781e-10, 1.41391378282161e-10, 1.486404571272767e-10, 1.562611876718498e-10, 1.642726236179098e-10, 1.726947954695236e-10, 1.81548760615426e-10, 1.908566559550693e-10, 2.006417532650738e-10, 2.109285173295399e-10, 2.217426671331787e-10, 2.33111240144394e-10, 2.450626599101589e-10, 2.576268070999621e-10, 2.708350941802386e-10, 2.847205439976262e-10, 2.99317872244851e-10, 3.146635743006214e-10, 3.307960164390137e-10, 3.477555317244947e-10, 3.655845208043741e-10, 3.843275579925322e-10, 4.040315025506173e-10, 4.24745615693416e-10, 4.465216847940665e-10, 4.694141502615158e-10, 4.934802454071732e-10, 5.187801351185532e-10, 5.453770705244291e-10, 5.733375436788606e-10, 6.027314561644753e-10, 6.336322913685127e-10, 6.661173010313948e-10, 7.00267695929725e-10, 7.361688502148018e-10, 7.739105137688045e-10, 8.135870377631653e-10, 8.552976086877217e-10, 8.991464989317819e-10, 9.452433233547094e-10, 9.93703317489905e-10, 1.044647623324663e-09, 1.098203591918777e-09, 1.154505101701741e-09, 1.213692894390856e-09, 1.275914923890827e-09, 1.341326729888863e-09, 1.410091822735745e-09, 1.48238209490449e-09, 1.558378248123777e-09, 1.638270246290851e-09, 1.722257790282949e-09, 1.810550816105341e-09, 1.903370019721319e-09, 2.000947408516337e-09, 2.103526881373913e-09, 2.211364835897196e-09, 2.324730812775918e-09, 2.443908165275804e-09, 2.56919477060693e-09, 2.700903769329846e-09, 2.839364353496779e-09, 2.98492258389161e-09, 3.137942257253736e-09, 3.298805814458253e-09, 3.4679152950475e-09, 3.645693342263278e-09, 3.832584257778596e-09, 4.029055111435931e-09, 4.235596909503982e-09, 4.452725818863184e-09, 4.68098445338896e-09, 4.920943242997812e-09, 5.173201832866935e-09, 5.438390606908928e-09, 5.717172241000715e-09, 6.010243370280193e-09, 6.318336330809816e-09, 6.642220977887191e-09, 6.982706610696085e-09, 7.340643994624821e-09, 7.716927483732463e-09, 8.11249726039189e-09, 8.528341668437109e-09, 8.96549969435627e-09, 9.425063551016333e-09, 9.908181406211015e-09, 1.041606025319368e-08, 1.094996890199977e-08, 1.151124119760985e-08, 1.210127926315425e-08, 1.272155709958747e-08, 1.337362416551454e-08, 1.405910933293982e-08, 1.477972487451301e-08, 1.553727077466952e-08, 1.633363921978004e-08, 1.71708193175682e-08, 1.805090203515248e-08, 1.897608544528564e-08, 1.994868018069263e-08, 2.097111522110682e-08, 2.204594393682148e-08, 2.317585044674199e-08, 2.43636563190998e-08, 2.561232762576699e-08, 2.692498229609492e-08, 2.830489791906169e-08, 2.975551990324834e-08, 3.128047004580046e-08, 3.28835556068679e-08, 3.45687787386491e-08, 3.634034648129855e-08, 3.820268126704765e-08, 4.01604319079069e-08, 4.221848515255771e-08, 4.438197793147501e-08, 4.665631008055593e-08, 4.904715780850377e-08, 5.156048785383135e-08, 5.420257229580109e-08, 5.698000420645674e-08, 5.989971403073858e-08, 6.296898686446399e-08, 6.61954805421812e-08, 6.958724474546913e-08, 7.31527409759954e-08, 7.690086362783333e-08, 8.084096210436852e-08, 8.498286408621866e-08, 8.933689991906796e-08, 9.391392836408493e-08, 9.872536352888003e-08, 1.037832032723508e-07, 1.091000589844198e-07, 1.146891869727925e-07, 1.205645213326995e-07, 1.267407086067478e-07, 1.332331441195794e-07, 1.400580102315662e-07, 1.472323164874999e-07, 1.547739418445186e-07, 1.627016790365573e-07, 1.710352812078172e-07, 1.797955108892141e-07, 1.890041915213969e-07, 1.986842615309954e-07, 2.08859831217524e-07, 2.195562425089482e-07, 2.308001317331347e-07, 2.426194956440322e-07, 2.550437607122598e-07, 2.681038560144713e-07, 2.818322897991574e-07, 2.962632299367772e-07, 3.114325884506053e-07, 3.273781103372626e-07, 3.441394669287691e-07, 3.617583539438268e-07, 3.802785945459855e-07, 3.997462475952407e-07, 4.20209721440262e-07, 4.417198934203703e-07, 4.643302354546738e-07, 4.880969459865361e-07, 5.130790885990524e-07, 5.393387376597011e-07, 5.669411313356714e-07, 5.959548323245659e-07, 6.264518967444783e-07, 6.585080515376935e-07, 6.922028808597494e-07, 7.276200218620636e-07, 7.64847370353958e-07, 8.039772968800969e-07, 8.451068736485378e-07, 8.883381129814498e-07, 9.337782176587485e-07, 9.815398440205418e-07, 1.031741378192831e-06, 1.084507226301554e-06, 1.139968119271894e-06, 1.198261432883391e-06, 1.259531524056818e-06, 1.323930083860387e-06, 1.391616508398726e-06, 1.46275828820068e-06, 1.537531417179222e-06, 1.616120822144585e-06, 1.69872081368413e-06, 1.785535559809063e-06, 1.876779583126523e-06, 1.972678282980571e-06, 2.073468483638268e-06, 2.179399009859035e-06, 2.290731291192572e-06, 2.407739996368083e-06, 2.530713699372852e-06, 2.659955578600839e-06, 2.795784150844797e-06, 2.938534041795081e-06, 3.088556794707953e-06, 3.246221719321388e-06, 3.411916782801973e-06, 3.586049544845875e-06, 3.769048139041128e-06, 3.961362302807314e-06, 4.163464458159765e-06, 4.375850845825095e-06, 4.599042715330222e-06, 4.833587573579484e-06, 5.080060494974536e-06, 5.33906549586832e-06, 5.611236976437753e-06, 5.897241233341377e-06, 6.197778046353853e-06, 6.513582342680919e-06, 6.845425942457282e-06, 7.194119389497415e-06, 7.560513871175174e-06, 7.945503231657447e-06, 8.350026082998716e-06, 8.775068018552848e-06, 9.221663933644992e-06, 9.690900458376771e-06, 1.018391850791746e-05, 1.070191595567326e-05, 1.124615043505077e-05, 1.181794227580005e-05, 1.241867758104503e-05, 1.30498114515801e-05, 1.371287136415586e-05, 1.440946071064691e-05, 1.514126250564188e-05, 1.591004326990466e-05, 1.671765709775613e-05, 1.756604991655633e-05, 1.845726394700672e-05, 1.939344237310855e-05, 2.037683423114873e-05, 2.140979952742888e-05, 2.249481459468731e-05, 2.363447769776064e-05, 2.483151489933599e-05, 2.608878619695023e-05, 2.740929194308764e-05, 2.879617956034143e-05, 3.025275056431062e-05, 3.178246790717541e-05, 3.338896365545659e-05, 3.507604701594935e-05, 3.684771272414758e-05, 3.87081498101805e-05, 4.066175075761556e-05, 4.271312107099571e-05, 4.486708926858499e-05, 4.712871731719358e-05, 4.95033115264495e-05, 5.199643392052583e-05, 5.461391410571983e-05, 5.736186165271205e-05, 6.024667901313331e-05, 6.327507499012023e-05, 6.645407878340968e-05, 6.979105462972426e-05, 7.329371705985078e-05, 7.697014679394617e-05, 8.08288072973724e-05, 8.487856201945274e-05, 8.912869233790657e-05, 9.358891623225634e-05, 9.826940770934464e-05, 0.0001031808170046963, 0.0001083342915833646, 0.0001137414979639949, 0.0001194146443899481, 0.0001253665043711606, 0.0001316104411202618, 0.0001381604329061833, 0.0001450310993481658, 0.0001522377286724839, 0.0001597963059536332, 0.0001677235423608623, 0.000176036905430045, 0.0001847546503797043, 0.0001938958524886172, 0.000203480440551038, 0.0002135292314235744, 0.000224063965675948, 0.0002351073443553553, 0.0002466830668716754, 0.0002588158700077851, 0.0002715315680559928, 0.0002848570940778976, 0.0002988205422810243, 0.0003134512115009738, 0.0003287796497730375, 0.0003448376999715452, 0.000361658546489543, 0.0003792767629245038, 0.0003977283607288938, 0.0004170508387764083, 0.000437283233786336, 0.0004584661715391705, 0.0004806419188065966, 0.0005038544359082033, 0.0005281494297954813, 0.0005535744075511128, 0.0005801787301779163, 0.0006080136665372141, 0.0006371324472807508, 0.0006675903186034633, 0.0006994445956264628, 0.0007327547152004819, 0.0007675822878994821, 0.0008039911489526439, 0.0008420474078396103, 0.000881819496249682, 0.0009233782140796808, 0.0009667967731179235, 0.001012150838033072, 0.001059518564256344, 0.001108980632314022, 0.001160620278133868, 0.001214523318814656, 0.001270778173311964, 0.001329475877456019, 0.001390710092678883, 0.001454577107788282, 0.001521175833084782, 0.001590607786076903, 0.001662977068006581, 0.001738390330353981, 0.001816956730447595, 0.001898787875261968, 0.001983997752442427, 0.002072702647553957, 0.002165021046509997, 0.00226107352209748, 0.002360982603477147, 0.002464872627503374, 0.002572869570677038, 0.002685100860517964, 0.002801695165121952, 0.002922782159651937, 0.003048492268504245, 0.003178956381890986, 0.003314305545588662, 0.003454670622623362, 0.00360018192569506, 0.003750968819189603, 0.003907159289688434, 0.004068879483964478, 0.004236253213549933, 0.004409401425080083, 0.004588441635757681, 0.004773487333448692, 0.004964647341112116, 0.005162025145488284, 0.005365718190222362, 0.005575817133884741, 0.005792405073670778, 0.006015556735918573, 0.006245337634978997, 0.006481803202406862, 0.006724997888917093, 0.006974954242066845, 0.007231691963181658, 0.007495216947642457, 0.007765520313288028, 0.00804257742236208, 0.00832634690314369, 0.008616769678137693, 0.00891376800646424, 0.009217244548865671, 0.009527081464534475, 0.009843139549749864, 0.01016525742907554, 0.01049325081060568, 0.0108269118174295, 0.01116600840809671, 0.011510283899384, 0.01185945660505667, 0.01221321960456193, 0.01257124065564639, 0.01293316226472083, 0.01329860192836342, 0.01366715255860949, 0.01403838310357796, 0.0144118393734793, 0.01478704508008535, 0.01516350309526388, 0.01554069693113345, 0.01591809244072374, 0.01629513973367906, 0.01667127529646678, 0.01704592430070572, 0.01741850307657123, 0.01778842172073921, 0.01815508679998579, 0.01851790410236462, 0.01887628137786603, 0.0192296309996676, 0.0195773724655948, 0.01991893464733652, 0.02025375768245689, 0.02058129439151377, 0.02090101108988542, 0.02121238765153072, 0.02151491667023636, 0.02180810155337436, 0.02209145337431778, 0.0223644863030222, 0.02262671143053819, 0.02287762880310647, 0.0231167174858078, 0.0233434234853631, 0.02355714537752501, 0.023757217507535, 0.0239428906633172, 0.02411331016141768, 0.02426749133611229, 0.02440429248345839, 0.0245223853850962, 0.02462022362188074, 0.02469619870621116, 0.02474928617385816, 0.02477865514424593, 0.02478350989414503, 0.02476309542130178, 0.02471670321013747, 0.02464367717856114, 0.0245434197811817, 0.02441539824021471, 0.02425915087119654, 0.02407429346624619, 0.02386052569306334, 0.02361763746319967, 0.02334551521836331, 0.02304414807870417, 0.02271363379223196, 0.02235418441974946, 0.02196613168509073, 0.02154993191603391, 0.02110617049714769, 0.02063556575213001, 0.02013897216995272, 0.01961738288654577, 0.01907193133187252, 0.01850389195124905, 0.01791467990979526, 0.01730584969005669, 0.01667909249535166, 0.01603623237535121, 0.01537922099599354, 0.01471013098324647, 0.01403114777954115, 0.01334455996316752, 0.01265274799457056, 0.0119581713695123, 0.01126335417754942, 0.01057086908523541, 0.009883319787009848, 0.009203321992775237, 0.008533483049662033, 0.007876380326310911, 0.007234538520882072, 0.006610406088707471, 0.006006331021547379, 0.005424536247328055, 0.004867094956375134, 0.004335906196709344, 0.003832671116100611, 0.003358870261188918, 0.002915742372948615, 0.002504265141815537, 0.002125138403524629, 0.0017787702666823, 0.001465266663820908, 0.001184424807646134, 0.0009357310119201545, 0.0007183633005151402, 0.0005311991773237768, 0.0003728288627935228, 0.0002415742189002812, 0.0001355134826467874, 5.251180823834245e-05, -9.7425202870427e-06, -5.369650081774036e-05, -8.188591851501781e-05, -9.688460515688835e-05, -0.0001012503194429692, -9.746833532456312e-05, -8.789405637771308e-05, -7.46962128506945e-05, -5.980244023162662e-05, -4.484927886408419e-05, -3.113886739579408e-05, -1.960482123725127e-05, -1.078998224465777e-05, -4.838887449121199e-06, -1.507921012861013e-06, -1.961714501674816e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 2, + "j": 3 + }, + { + "i": 3, + "radial_function": [ + 1.61432063066176e-09, 1.655187527149781e-09, 1.697088983028386e-09, 1.740051188753669e-09, 1.784100997819904e-09, 1.829265943545613e-09, 1.875574256284684e-09, 1.923054881073251e-09, 1.97173749572335e-09, 2.021652529374734e-09, 2.072831181516357e-09, 2.125305441489483e-09, 2.17910810848464e-09, 2.234272812044829e-09, 2.290834033087929e-09, 2.348827125461295e-09, 2.408288338042235e-09, 2.469254837397947e-09, 2.531764731019315e-09, 2.595857091142938e-09, 2.661571979176297e-09, 2.728950470741486e-09, 2.798034681352987e-09, 2.868867792745643e-09, 2.941494079869299e-09, 3.015958938566933e-09, 3.092308913953714e-09, 3.170591729514532e-09, 3.250856316938438e-09, 3.333152846708504e-09, 3.417532759466236e-09, 3.504048798170234e-09, 3.592755041069199e-09, 3.683706935509835e-09, 3.776961332600944e-09, 3.872576522755211e-09, 3.970612272131046e-09, 4.071129859997318e-09, 4.17419211704417e-09, 4.279863464664122e-09, 4.388209955227802e-09, 4.499299313379687e-09, 4.613200978379655e-09, 4.729986147516729e-09, 4.849727820622334e-09, 4.972500845710789e-09, 5.098381965775686e-09, 5.227449866771452e-09, 5.359785226809943e-09, 5.495470766603159e-09, 5.634591301183331e-09, 5.777233792933002e-09, 5.923487405958128e-09, 6.073443561838287e-09, 6.227195996789065e-09, 6.38484082027186e-09, 6.546476575088575e-09, 6.712204298998127e-09, 6.882127587893764e-09, 7.056352660580546e-09, 7.23498842519357e-09, 7.418146547298627e-09, 7.605941519717785e-09, 7.798490734123762e-09, 7.995914554447754e-09, 8.198336392146922e-09, 8.405882783378259e-09, 8.618683468127814e-09, 8.836871471344097e-09, 9.060583186127093e-09, 9.28995845902461e-09, 9.525140677489495e-09, 9.766276859552628e-09, 1.001351774576769e-08, 1.02670178934854e-08, 1.052693577351632e-08, 1.079343386924257e-08, 1.1066678778241e-08, 1.134684131648101e-08, 1.163409662516271e-08, 1.192862428026197e-08, 1.223060840485153e-08, 1.25402377842682e-08, 1.285770598419825e-08, 1.318321147175568e-08, 1.351695773962826e-08, 1.38591534333702e-08, 1.421001248192043e-08, 1.456975423142914e-08, 1.493860358247568e-08, 1.53167911307644e-08, 1.570455331138682e-08, 1.610213254674008e-08, 1.650977739819488e-08, 1.692774272160796e-08, 1.735628982677682e-08, 1.779568664093648e-08, 1.8246207876401e-08, 1.870813520245542e-08, 1.918175742160516e-08, 1.966737065029438e-08, 2.016527850420622e-08, 2.06757922882615e-08, 2.119923119143538e-08, 2.173592248651392e-08, 2.228620173491637e-08, 2.285041299671166e-08, 2.342890904596147e-08, 2.402205159152447e-08, 2.46302115034614e-08, 2.52537690451825e-08, 2.589311411148367e-08, 2.654864647262136e-08, 2.722077602457876e-08, 2.790992304568154e-08, 2.861651845972438e-08, 2.93410041057734e-08, 3.008383301481514e-08, 3.084546969342532e-08, 3.162639041463643e-08, 3.242708351618719e-08, 3.324804970634229e-08, 3.408980237747326e-08, 3.495286792760045e-08, 3.583778609009707e-08, 3.674511027176333e-08, 3.767540789948404e-08, 3.862926077568894e-08, 3.96072654428376e-08, 4.061003355716293e-08, 4.163819227190352e-08, 4.269238463027223e-08, 4.377326996840549e-08, 4.488152432854875e-08, 4.601784088273991e-08, 4.718293036725654e-08, 4.837752152810313e-08, 4.96023615778192e-08, 5.085821666389722e-08, 5.214587234910636e-08, 5.346613410402608e-08, 5.481982781210126e-08, 5.620780028753907e-08, 5.763091980637414e-08, 5.909007665104028e-08, 6.058618366879155e-08, 6.212017684432929e-08, 6.36930158869961e-08, 6.530568483290914e-08, 6.69591926624175e-08, 6.865457393327142e-08, 7.039288942990967e-08, 7.217522682927386e-08, 7.400270138357551e-08, 7.587645662044914e-08, 7.779766506093557e-08, 7.976752895575332e-08, 8.178728104032782e-08, 8.385818530905735e-08, 8.598153780931065e-08, 8.815866745565955e-08, 9.039093686487166e-08, 9.267974321218932e-08, 9.502651910944507e-08, 9.743273350557642e-08, 9.989989261010904e-08, 1.024295408402077e-07, 1.050232617918922e-07, 1.076826792360452e-07, 1.104094581398529e-07, 1.132053057143216e-07, 1.160719724885583e-07, 1.190112534114907e-07, 1.220249889817403e-07, 1.251150664063789e-07, 1.28283420789301e-07, 1.315320363499945e-07, 1.348629476734769e-07, 1.38278240992215e-07, 1.417800555008515e-07, 1.453705847045817e-07, 1.490520778020618e-07, 1.528268411037333e-07, 1.56697239486486e-07, 1.606656978856022e-07, 1.647347028249428e-07, 1.68906803986383e-07, 1.731846158194933e-07, 1.775708191925372e-07, 1.820681630858405e-07, 1.86679466328645e-07, 1.91407619380577e-07, 1.962555861588902e-07, 2.012264059126798e-07, 2.063231951452927e-07, 2.115491495861896e-07, 2.169075462135563e-07, 2.22401745328987e-07, 2.280351926856045e-07, 2.338114216710179e-07, 2.397340555465481e-07, 2.458068097442148e-07, 2.520334942229757e-07, 2.584180158858034e-07, 2.649643810591764e-07, 2.716766980366433e-07, 2.785591796881431e-07, 2.856161461368139e-07, 2.928520275050758e-07, 3.002713667318197e-07, 3.078788224625762e-07, 3.15679172014613e-07, 3.236773144189223e-07, 3.318782735411693e-07, 3.402872012836776e-07, 3.489093808706095e-07, 3.577502302185756e-07, 3.668153053949237e-07, 3.761103041660749e-07, 3.856410696382917e-07, 3.954135939933654e-07, 4.054340223217693e-07, 4.157086565558714e-07, 4.262439595059253e-07, 4.370465590015874e-07, 4.48123252141794e-07, 4.594810096559572e-07, 4.711269803794408e-07, 4.830684958464531e-07, 4.953130750035079e-07, 5.078684290467426e-07, 5.207424663864578e-07, 5.339432977423397e-07, 5.474792413729306e-07, 5.613588284430257e-07, 5.755908085327436e-07, 5.901841552922044e-07, 6.051480722457533e-07, 6.204919987498984e-07, 6.36225616109167e-07, 6.523588538542463e-07, 6.689018961869247e-07, 6.858651885964266e-07, 7.032594446519e-07, 7.210956529760139e-07, 7.393850844046363e-07, 7.581392993378589e-07, 7.773701552877079e-07, 7.970898146280418e-07, 8.173107525523648e-07, 8.380457652453814e-07, 8.593079782743598e-07, 8.811108552065043e-07, 9.034682064587806e-07, 9.26394198386807e-07, 9.499033626196007e-07, 9.740106056472585e-07, 9.987312186687857e-07, 1.024080887707572e-06, 1.050075704002243e-06, 1.07673217468079e-06, 1.104067233726261e-06, 1.13209825324242e-06, 1.160843055028161e-06, 1.190319922469669e-06, 1.220547612759651e-06, 1.251545369453241e-06, 1.283332935370504e-06, 1.315930565855792e-06, 1.349359042404522e-06, 1.383639686668298e-06, 1.418794374849671e-06, 1.454845552498209e-06, 1.49181624971988e-06, 1.529730096812256e-06, 1.568611340338328e-06, 1.608484859652326e-06, 1.649376183891158e-06, 1.691311509445802e-06, 1.734317717927289e-06, 1.778422394642431e-06, 1.823653847595114e-06, 1.870041127029303e-06, 1.917614045530634e-06, 1.966403198704032e-06, 2.016439986445262e-06, 2.067756634825173e-06, 2.120386218605832e-06, 2.174362684408585e-06, 2.229720874554749e-06, 2.28649655160029e-06, 2.344726423586733e-06, 2.404448170031273e-06, 2.465700468679955e-06, 2.52852302304848e-06, 2.592956590776389e-06, 2.659043012820995e-06, 2.726825243518608e-06, 2.796347381541588e-06, 2.867654701780706e-06, 2.940793688183554e-06, 3.015812067580724e-06, 3.09275884453276e-06, 3.171684337232196e-06, 3.252640214496142e-06, 3.335679533886145e-06, 3.420856780994023e-06, 3.508227909932859e-06, 3.597850385074889e-06, 3.689783224078895e-06, 3.784087042251645e-06, 3.880824098289772e-06, 3.980058341449928e-06, 4.081855460197315e-06, 4.186282932384426e-06, 4.293410077013944e-06, 4.403308107641919e-06, 4.516050187479376e-06, 4.631711486253368e-06, 4.750369238889998e-06, 4.872102806085588e-06, 4.996993736833772e-06, 5.125125832979898e-06, 5.256585215876325e-06, 5.391460395215829e-06, 5.529842340122974e-06, 5.671824552586801e-06, 5.817503143321884e-06, 5.966976910147729e-06, 6.120347418980941e-06, 6.27771908753819e-06, 6.439199271851796e-06, 6.604898355704816e-06, 6.77492984309607e-06, 6.949410453850986e-06, 7.12846022249854e-06, 7.312202600539634e-06, 7.500764562238386e-06, 7.694276714072084e-06, 7.892873407982389e-06, 8.09669285857657e-06, 8.305877264432471e-06, 8.520572933669963e-06, 8.740930413956199e-06, 8.96710462712097e-06, 9.19925500856526e-06, 9.437545651654392e-06, 9.682145457295307e-06, 9.933228288906474e-06, 1.019097313299797e-05, 1.045556426558872e-05, 1.072719142469776e-05, 1.10060499891578e-05, 1.129234116400846e-05, 1.158627217274015e-05, 1.188805645666986e-05, 1.219791388174333e-05, 1.25160709530718e-05, 1.28427610375235e-05, 1.317822459470683e-05, 1.352270941669482e-05, 1.387647087685803e-05, 1.423977218818834e-05, 1.461288467151375e-05, 1.499608803402292e-05, 1.538967065853566e-05, 1.579392990397647e-05, 1.620917241752881e-05, 1.66357144589685e-05, 1.707388223769895e-05, 1.752401226303229e-05, 1.798645170828775e-05, 1.846155878930253e-05, 1.894970315797911e-05, 1.94512663115198e-05, 1.996664201803097e-05, 2.049623675920826e-05, 2.104047019084883e-05, 2.159977562196909e-05, 2.217460051334284e-05, 2.276540699631186e-05, 2.337267241275968e-05, 2.399688987718101e-05, 2.463856886182021e-05, 2.529823580589934e-05, 2.597643475000003e-05, 2.6673727996716e-05, 2.739069679873963e-05, 2.812794207560394e-05, 2.888608516035417e-05, 2.966576857748228e-05, 3.046765685352032e-05, 3.129243736174997e-05, 3.214082120255436e-05, 3.301354412100783e-05, 3.391136746337091e-05, 3.4835079174236e-05, 3.578549483614757e-05, 3.676345875360384e-05, 3.776984508343717e-05, 3.880555901365459e-05, 3.987153799292356e-05, 4.096875301297873e-05, 4.209820994633612e-05, 4.326095094180474e-05, 4.445805588040055e-05, 4.569064389438622e-05, 4.69598749522792e-05, 4.826695151280636e-05, 4.961312025090984e-05, 5.09996738590528e-05, 5.242795292721914e-05, 5.38993479051513e-05, 5.541530115053193e-05, 5.697730906697669e-05, 5.858692433588076e-05, 6.024575824634026e-05, 6.195548312755294e-05, 6.371783488830502e-05, 6.553461566834454e-05, 6.740769660665893e-05, 6.933902073189233e-05, 7.133060598036247e-05, 7.338454834738204e-05, 7.55030251778276e-05, 7.768829860216312e-05, 7.994271912438591e-05, 8.22687293686427e-05, 8.466886799154832e-05, 8.714577376754385e-05, 8.970218985492772e-05, 9.234096825052942e-05, 9.506507444131471e-05, 9.78775922615641e-05, 0.0001007817289646166, 0.0001037808205185453, 0.000106878337135509, 0.0001100778890449174, 0.0001133832325209607, 0.0001167982761754687, 0.0001203270875275026, 0.0001239738998615281, 0.0001277431193864796, 0.0001316393327085023, 0.0001356673146306446, 0.0001398320362932758, 0.0001441386736695196, 0.0001485926164305216, 0.0001531994771959114, 0.0001579651011853751, 0.0001628955762878178, 0.0001679972435651742, 0.0001732767082085195, 0.0001787408509647271, 0.0001843968400525355, 0.0001902521435874988, 0.0001963145425359307, 0.0002025921442185744, 0.0002090933963853818, 0.0002158271018834165, 0.0002228024339405446, 0.0002300289520882239, 0.0002375166187473306, 0.0002452758165016212, 0.0002533173660840324, 0.0002616525451016579, 0.0002702931075258431, 0.0002792513039744072, 0.0002885399028136061, 0.0002981722121079424, 0.0003081621024464778, 0.0003185240306747536, 0.0003292730645618514, 0.000340424908432534, 0.0003519959297947024, 0.0003640031869926936, 0.0003764644579171237, 0.0003893982698020969, 0.0004028239301406399, 0.000416761558749128, 0.0004312321210113054, 0.0004462574623321849, 0.0004618603438316753, 0.0004780644793072086, 0.0004948945734938784, 0.0005123763616496679, 0.0005305366504922526, 0.0005494033605124784, 0.0005690055696880997, 0.0005893735586195094, 0.0006105388571070958, 0.0006325342921875069, 0.0006553940376433211, 0.0006791536649976442, 0.0007038501960016504, 0.0007295221566192679, 0.0007562096325089603, 0.0007839543259977312, 0.0008127996145373568, 0.0008427906106269538, 0.0008739742231797049, 0.0009063992203045799, 0.0009401162934662428, 0.0009751781229780642, 0.001011639444774064, 0.001049557118395762, 0.001088990196119276, 0.001129999993136377, 0.001172650158690803, 0.001217006748057602, 0.001263138295238743, 0.001311115886232765, 0.001361013232719253, 0.001412906745981321, 0.001466875610869777, 0.001523001859592404, 0.001581370445089864, 0.001642069313736407, 0.001705189477079109, 0.001770825082302949, 0.001839073481081461, 0.001910035296443331, 0.00198381448725444, 0.002060518409882325, 0.00214025787657584, 0.002223147210056934, 0.00230930429378402, 0.002398850617307148, 0.002491911316094587, 0.002588615205167943, 0.002689094805839065, 0.002793486364796802, 0.002901929864744799, 0.00301456902574385, 0.003131551296363153, 0.003253027833695088, 0.003379153471237488, 0.0035100866735963, 0.003645989476910571, 0.00378702741385038, 0.00393336942198806, 0.004085187734293407, 0.004242657750455302, 0.004405957887686314, 0.004575269409622727, 0.004750776231892225, 0.004932664702884735, 0.005121123358229836, 0.005316342647457933, 0.005518514631302224, 0.00572783264808595, 0.005944490947635681, 0.006168684291166739, 0.006400607515604184, 0.006640455060831362, 0.00688842045840114, 0.007144695780302981, 0.007409471046453596, 0.007682933589672711, 0.007965267377018373, 0.00825665228649162, 0.008557263338279403, 0.008867269879888074, 0.009186834724732225, 0.009516113243982627, 0.009855252411748384, 0.01020438980397138, 0.01056365255174653, 0.01093315625015383, 0.01131300382409411, 0.01170328435306476, 0.01210407185729224, 0.01251542404815571, 0.01293738104639091, 0.01336996407215299, 0.01381317411164027, 0.01426699056563636, 0.01473136988600969, 0.01520624420691687, 0.01569151997817917, 0.01618707660903729, 0.01669276513122841, 0.01720840689106049, 0.01773379228087477, 0.01826867952097001, 0.01881279350370121, 0.0193658247120419, 0.0199274282253932, 0.02049722282581779, 0.0210747902181422, 0.02165967437748906, 0.02225138103773805, 0.02284937733414328, 0.02345309161282542, 0.02406191341907013, 0.0246751936752721, 0.0252922450579247, 0.02591234258123422, 0.02653472439270015, 0.02715859278330893, 0.02778311541180877, 0.02840742673883366, 0.02903062966240003, 0.02965179734148995, 0.03026997518904441, 0.03088418300971967, 0.03149341725121348, 0.0320966533308715, 0.03269284799167689, 0.03328094163366662, 0.0338598605583903, 0.03442851905534198, 0.03498582125048805, 0.0355306626282632, 0.0360619311299139, 0.03657850772309035, 0.03707926633041077, 0.03756307299868584, 0.03802878418597927, 0.0384752440411257, 0.03890128055020814, 0.0393057004273431, 0.03968728263349602, 0.04004477041756539, 0.04037686178925699, 0.0406821983539814, 0.04095935246679565, 0.04120681269591892, 0.04142296762718324, 0.04160608808946906, 0.0417543079381637, 0.04186560359927091, 0.04193794003300193, 0.04196984343544428, 0.04196005316010078, 0.04190738538045276, 0.04181074282295429, 0.04166912472925695, 0.04148163699732178, 0.04124750244422729, 0.0409660711263703, 0.04063683064547435, 0.04025941636140393, 0.03983362142528263, 0.03935940653898817, 0.03883690933974907, 0.03826645330148068, 0.03764855603778486, 0.03698393688528281, 0.03627352364040012, 0.03551845831794961, 0.03472010179609811, 0.03388003720975558, 0.03300007195322491, 0.03208223815341324, 0.03112879147715703, 0.03014220814052843, 0.02912517999461101, 0.0280806075713135, 0.02701159098463915, 0.02592141859756983, 0.02481355338258154, 0.02369161692495592, 0.02255937104253566, 0.0214206970235844, 0.02027957251585555, 0.01914004613489265, 0.01800620989783233, 0.016882169630308, 0.01577201353822874, 0.01467977918271737, 0.01360941914483424, 0.01256476571617748, 0.01154949500116795, 0.01056709086582733, 0.009620809214927445, 0.008713643123240328, 0.007848289385726542, 0.007027117084249607, 0.006252138793048784, 0.005524985059893267, 0.004846882802712837, 0.004218638250642143, 0.003640625032118427, 0.003112777969166924, 0.002634593075011821, 0.002205134170481737, 0.001823046432676445, 0.001486577066891204, 0.001193603150157707, 0.000941666533112312, 0.0007280155079800732, 0.0005496527567961096, 0.0004033888891133153, 0.0002859006664768515, 0.0001937927972246595, 0.0001236619757281482, 7.216164201320763e-05, 3.606575889812669e-05, 1.232975295359373e-05, -1.853347426611363e-06, -9.003608523239226e-06, -1.131376007416918e-05, -1.061960972645326e-05, -8.383130727457333e-06, -5.689811783761434e-06, -3.261516315911095e-06, -1.485644385623708e-06, -4.608169887967972e-07, -5.860474730848275e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 0, + "j": 3 + }, + { + "i": 3, + "radial_function": [ + 7.627687069263136e-15, 8.018766884935741e-15, 8.42989778660756e-15, 8.862107815058022e-15, 9.316477719814657e-15, 9.794143661585944e-15, 1.029630005325085e-14, 1.082420254650887e-14, 1.137917117165838e-14, 1.196259363835512e-14, 1.257592880560344e-14, 1.322071032965784e-14, 1.389855049895605e-14, 1.461114426567302e-14, 1.536027348397701e-14, 1.614781136558475e-14, 1.697572716375808e-14, 1.784609109745349e-14, 1.876107952793774e-14, 1.972298040081407e-14, 2.073419896706602e-14, 2.179726379742527e-14, 2.291483310510209e-14, 2.408970139268825e-14, 2.532480643985366e-14, 2.66232366493084e-14, 2.79882387694002e-14, 2.942322601265672e-14, 3.093178659057337e-14, 3.25176926859886e-14, 3.418490988548065e-14, 3.593760709537337e-14, 3.778016696614474e-14, 3.971719685130488e-14, 4.175354032814697e-14, 4.389428930917684e-14, 4.614479677450809e-14, 4.851069015705896e-14, 5.099788541402089e-14, 5.361260181978493e-14, 5.636137751731425e-14, 5.925108586685142e-14, 6.228895263283828e-14, 6.548257405202543e-14, 6.883993582795125e-14, 7.236943309928271e-14, 7.607989143195421e-14, 7.998058888759091e-14, 8.40812792234003e-14, 8.839221628154398e-14, 9.292417962897212e-14, 9.768850151183797e-14, 1.026970951918868e-13, 1.079624847356768e-13, 1.134978363311221e-13, 1.193169912096567e-13, 1.254345002563515e-13, 1.318656603945183e-13, 1.386265528357847e-13, 1.457340832912842e-13, 1.532060242445033e-13, 1.610610593914968e-13, 1.693188303595882e-13, 1.779999858213747e-13, 1.871262331268513e-13, 1.967203925827485e-13, 2.068064545148216e-13, 2.174096392557647e-13, 2.28556460208752e-13, 2.402747901443002e-13, 2.525939308962129e-13, 2.655446866308975e-13, 2.7915944087325e-13, 2.934722374817137e-13, 3.085188657749944e-13, 3.243369500232715e-13, 3.409660435276994e-13, 3.584477275234243e-13, 3.768257151534276e-13, 3.961459607731842e-13, 4.164567748594327e-13, 4.378089448104069e-13, 4.602558619395637e-13, 4.83853654980366e-13, 5.086613304359409e-13, 5.347409201245396e-13, 5.621576362897566e-13, 5.909800346633307e-13, 6.212801858882736e-13, 6.531338557309657e-13, 6.86620694532805e-13, 7.218244363751648e-13, 7.588331084556329e-13, 7.977392511990745e-13, 8.386401496539109e-13, 8.816380767521709e-13, 9.268405490416218e-13, 9.74360595529387e-13, 1.024317040309292e-12, 1.076834799679649e-12, 1.132045194494344e-12, 1.190086278528332e-12, 1.251103183678511e-12, 1.315248482863184e-12, 1.382682571527496e-12, 1.453574068708728e-12, 1.528100238664382e-12, 1.606447434117195e-12, 1.688811562225461e-12, 1.775398574443758e-12, 1.866424981498885e-12, 1.962118394768753e-12, 2.062718095417828e-12, 2.168475632712219e-12, 2.279655453010467e-12, 2.39653556100268e-12, 2.519408214851485e-12, 2.648580656972856e-12, 2.784375882284056e-12, 2.927133445839665e-12, 3.077210311874983e-12, 3.2349817463799e-12, 3.400842255434846e-12, 3.575206571655128e-12, 3.758510691210098e-12, 3.951212964009988e-12, 4.153795239786445e-12, 4.366764072932248e-12, 4.590651989112761e-12, 4.826018816816188e-12, 5.073453087171723e-12, 5.333573505535963e-12, 5.607030498526738e-12, 5.894507840372629e-12, 6.19672436264462e-12, 6.514435751644471e-12, 6.848436437944245e-12, 7.199561582801218e-12, 7.568689166414749e-12, 7.956742183246693e-12, 8.364690949893804e-12, 8.793555531283008e-12, 9.244408291255448e-12, 9.718376573916404e-12, 1.021664552245555e-11, 1.074046104248475e-11, 1.129113291730319e-11, 1.187003808287834e-11, 1.247862407073108e-11, 1.311841262733336e-11, 1.379100351906701e-11, 1.449807853225786e-11, 1.524140567828531e-11, 1.602284361428163e-11, 1.684434629047345e-11, 1.770796783578407e-11, 1.861586769391269e-11, 1.957031602273079e-11, 2.057369937049513e-11, 2.162852664306951e-11, 2.273743537707291e-11, 2.39031983346394e-11, 2.512873043627635e-11, 2.641709604915419e-11, 2.777151664905038e-11, 2.91953788751003e-11, 3.069224299749741e-11, 3.226585181930712e-11, 3.392014003465393e-11, 3.565924406667621e-11, 3.748751240984193e-11, 3.940951650248677e-11, 4.143006215674949e-11, 4.355420157448504e-11, 4.578724597919284e-11, 4.813477889553774e-11, 5.060267010966858e-11, 5.319709034522532e-11, 5.592452669173226e-11, 5.879179882394308e-11, 6.180607605268276e-11, 6.497489524981895e-11, 6.830617969216317e-11, 7.180825887141762e-11, 7.548988931968557e-11, 7.936027650260255e-11, 8.34290978348269e-11, 8.770652687541054e-11, 9.220325876354352e-11, 9.693053695825198e-11, 1.019001813488867e-10, 1.071246178066834e-10, 1.126169092512479e-10, 1.183907883096364e-10, 1.244606916496625e-10, 1.30841796073244e-10, 1.375500564600274e-10, 1.446022456561099e-10, 1.520159964075834e-10, 1.598098454437068e-10, 1.680032798198844e-10, 1.766167856363093e-10, 1.856718992634176e-10, 1.951912611468266e-10, 2.051986724549822e-10, 2.157191545605124e-10, 2.267790116311086e-10, 2.384058962706804e-10, 2.506288788337299e-10, 2.634785199092297e-10, 2.769869468532144e-10, 2.911879340770572e-10, 3.061169874279028e-10, 3.218114329862034e-10, 3.383105104590814e-10, 3.556554710450368e-10, 3.738896808178952e-10, 3.930587289572408e-10, 4.132105418329829e-10, 4.343955026603152e-10, 4.566665776051885e-10, 4.800794481221253e-10, 5.046926500887482e-10, 5.305677201956565e-10, 5.577693497538146e-10, 5.863655464279764e-10, 6.16427804132898e-10, 6.480312818800167e-10, 6.812549915073072e-10, 7.161819953074983e-10, 7.528996128629974e-10, 7.914996427254706e-10, 8.320785851950892e-10, 8.74737889194598e-10, 9.195842066511597e-10, 9.667296516693387e-10, 1.016292089954076e-09, 1.068395426868779e-09, 1.123169919833567e-09, 1.180752502106494e-09, 1.241287129193337e-09, 1.304925133628463e-09, 1.371825604476787e-09, 1.442155787830663e-09, 1.516091501500501e-09, 1.593817575618768e-09, 1.675528314599428e-09, 1.761427982985843e-09, 1.851731315193149e-09, 1.946664051059625e-09, 2.046463505132115e-09, 2.151379151517743e-09, 2.261673253511792e-09, 2.37762151879493e-09, 2.499513784055808e-09, 2.6276547429191e-09, 2.762364706170376e-09, 2.903980401418521e-09, 3.052855816461879e-09, 3.209363081973821e-09, 3.37389339924113e-09, 3.546858023377279e-09, 3.728689285536336e-09, 3.919841676514196e-09, 4.120792977491913e-09, 4.332045457676808e-09, 4.554127131923954e-09, 4.787593070320534e-09, 5.033026791992855e-09, 5.291041722910057e-09, 5.562282725064831e-09, 5.847427706872568e-09, 6.147189319184251e-09, 6.462316736412864e-09, 6.793597519749903e-09, 7.141859594962362e-09, 7.50797331225673e-09, 7.892853619732388e-09, 8.297462363547858e-09, 8.722810648257957e-09, 9.169961434323363e-09, 9.640032106297762e-09, 1.013419732685188e-08, 1.065369196099248e-08, 1.119981410572622e-08, 1.17739284119336e-08, 1.237746942249207e-08, 1.301194521790319e-08, 1.367894110355123e-08, 1.438012363661404e-08, 1.511724474056983e-08, 1.589214611249582e-08, 1.670676379552663e-08, 1.756313302035757e-08, 1.846339328512057e-08, 1.940979367191162e-08, 2.040469851643293e-08, 2.14505932467821e-08, 2.255009061472911e-08, 2.370593719756625e-08, 2.492102027545679e-08, 2.61983750419898e-08, 2.754119212344086e-08, 2.895282557001472e-08, 3.043680123129894e-08, 3.19968255592567e-08, 3.363679481904233e-08, 3.536080479952855e-08, 3.717316108326186e-08, 3.907838973220311e-08, 4.108124858297368e-08, 4.3186739129473e-08, 4.540011895043531e-08, 4.772691486695889e-08, 5.017293669349737e-08, 5.274429167817269e-08, 5.544739981265933e-08, 5.828900976700187e-08, 6.127621569989086e-08, 6.441647495246446e-08, 6.771762666200051e-08, 7.11879112292532e-08, 7.483599085800336e-08, 7.867097117784231e-08, 8.270242385903204e-08, 8.694041046069411e-08, 9.139550753490927e-08, 9.607883287036323e-08, 1.010020732461183e-07, 1.061775134629932e-07, 1.11618066947153e-07, 1.173373078667148e-07, 1.233495049578654e-07, 1.296696569241301e-07, 1.36313529833003e-07, 1.432976962964816e-07, 1.506395766497267e-07, 1.583574823384675e-07, 1.66470661377827e-07, 1.749993462253833e-07, 1.839648040621952e-07, 1.933893896520262e-07, 2.032966008370474e-07, 2.137111369798716e-07, 2.246589602429118e-07, 2.361673601131564e-07, 2.482650211134626e-07, 2.60982094045216e-07, 2.743502708079519e-07, 2.884028630317553e-07, 3.031748847320616e-07, 3.18703139167942e-07, 3.350263101140472e-07, 3.521850577516702e-07, 3.702221194934868e-07, 3.891824159098174e-07, 4.091131619705298e-07, 4.300639840961954e-07, 4.520870430085793e-07, 4.752371629036024e-07, 4.995719671376516e-07, 5.251520208963668e-07, 5.520409809590502e-07, 5.803057532402544e-07, 6.100166581538188e-07, 6.412476045070379e-07, 6.740762721190244e-07, 7.085843037814884e-07, 7.448575068489311e-07, 7.829860651525411e-07, 8.230647615846028e-07, 8.651932119573825e-07, 9.094761107408187e-07, 9.560234892049384e-07, 1.004950986629658e-06, 1.05638013524014e-06, 1.110438659564128e-06, 1.167260790860414e-06, 1.226987597535948e-06, 1.289767332136644e-06, 1.355755795951162e-06, 1.425116721961117e-06, 1.498022177100953e-06, 1.574652984906877e-06, 1.655199169271663e-06, 1.739860420810135e-06, 1.828846586450155e-06, 1.922378183858698e-06, 2.020686941660001e-06, 2.124016366877462e-06, 2.232622340803278e-06, 2.346773744896789e-06, 2.466753118050029e-06, 2.592857346854011e-06, 2.725398390496856e-06, 2.864704041979568e-06, 3.011118727452809e-06, 3.165004345563497e-06, 3.32674114887083e-06, 3.496728669244394e-06, 3.67538668952539e-06, 3.863156263696637e-06, 4.060500788042116e-06, 4.267907125544722e-06, 4.485886786464008e-06, 4.714977167655241e-06, 4.955742853465939e-06, 5.208776981379673e-06, 5.474702675427901e-06, 5.754174550822973e-06, 6.047880293067713e-06, 6.356542315434439e-06, 6.680919498314035e-06, 7.0218090147199e-06, 7.38004824585005e-06, 7.756516791274278e-06, 8.15213857812986e-06, 8.567884074261484e-06, 9.004772610156005e-06, 9.463874814994939e-06, 9.946315172336047e-06, 1.045327470096291e-05, 1.098599376705731e-05, 1.15457750339046e-05, 1.213398655560537e-05, 1.275206502170839e-05, 1.340151915983747e-05, 1.408393330389552e-05, 1.480097113544903e-05, 1.555437960666141e-05, 1.634599305305653e-05, 1.717773750522302e-05, 1.805163520848431e-05, 1.896980936051946e-05, 1.993448907662897e-05, 2.094801459364619e-05, 2.201284272300704e-05, 2.313155256484259e-05, 2.430685149480421e-05, 2.554158143607626e-05, 2.683872542964326e-05, 2.820141451629439e-05, 2.963293494428266e-05, 3.113673571763133e-05, 3.271643649977665e-05, 3.437583588900266e-05, 3.611892008174358e-05, 3.794987194096471e-05, 3.987308048754233e-05, 4.189315083307991e-05, 4.401491457333002e-05, 4.624344066211849e-05, 4.85840467866855e-05, 5.104231126555394e-05, 5.362408549132803e-05, 5.633550694161323e-05, 5.918301278143949e-05, 6.217335408235216e-05, 6.531361068332124e-05, 6.861120672003573e-05, 7.207392684972327e-05, 7.570993319958343e-05, 7.952778306792696e-05, 8.353644740775903e-05, 8.774533012361546e-05, 9.216428821347406e-05, 9.680365278790351e-05, 0.0001016742510001984, 0.0001067874289216753, 0.0001121550753971821, 0.0001177896469168806, 0.0001237041935409358, 0.0001299123859147859, 0.0001364285434131501, 0.0001432676634516862, 0.0001504454520059973, 0.0001579783553780044, 0.0001658835932503068, 0.0001741791930696894, 0.0001828840258010458, 0.0001920178430932655, 0.0002016013158988224, 0.0002116560745886426, 0.0002222047506036862, 0.0002332710196842992, 0.0002448796467178699, 0.0002570565322446594, 0.0002698287606603731, 0.0002832246501534237, 0.0002972738044126102, 0.0003120071661398376, 0.0003274570723998475, 0.00034365731183681, 0.000360643183784601, 0.000378451559294274, 0.0003971209440986831, 0.0004166915435297279, 0.0004372053293991712, 0.0004587061088482158, 0.0004812395951656101, 0.0005048534805668365, 0.0005295975109197942, 0.0005555235623941926, 0.0005826857200027205, 0.0006111403579923136, 0.0006409462220325994, 0.0006721645131370743, 0.0007048589732391022, 0.0007390959723309503, 0.0007749445970582735, 0.0008124767406458867, 0.0008517671940123871, 0.0008928937379112285, 0.000935937235914934, 0.0009809817280358275, 0.001028114524752099, 0.001077426301181384, 0.001129011191115508, 0.001182966880599561, 0.001239394700705747, 0.001298399719117619, 0.001360090830103197, 0.00142458084241588, 0.001491986564620216, 0.001562428887295028, 0.001636032861519492, 0.001712927772998054, 0.001793247211127701, 0.00187712913225641, 0.001964715916323676, 0.002056154416013786, 0.002151595997489839, 0.0022511965717106, 0.002355116615264734, 0.002463521179586563, 0.002576579887344958, 0.002694466914722971, 0.002817360958229315, 0.002945445184606016, 0.00307890716231775, 0.003217938773029833, 0.003362736101402978, 0.003513499301454658, 0.003670432437660283, 0.003833743298892512, 0.004003643183225483, 0.004180346651563242, 0.004364071247989037, 0.004555037184676799, 0.004753466989157814, 0.00495958511169744, 0.005173617490509786, 0.005395791072524, 0.005626333287417763, 0.0058654714726519, 0.006113432247279489, 0.00637044083236405, 0.006636720315928475, 0.006912490860472156, 0.007197968851240591, 0.007493365983614204, 0.007798888288204104, 0.008114735092504995, 0.00844109791826542, 0.008778159314093623, 0.009126091623230357, 0.00948505568689029, 0.009855199484104649, 0.01023665670959518, 0.01062954529187339, 0.01103396585449566, 0.01145000012421502, 0.01187770929065519, 0.0123171323230964, 0.01276828425100116, 0.0132311544160246, 0.01370570470444348, 0.01419186777019707, 0.01468954526005736, 0.0151986060538243, 0.01571888453386681, 0.01625017889978678, 0.01679224954545363, 0.01734481751712487, 0.01790756307280296, 0.01848012436435815, 0.01906209626523321, 0.01965302936770119, 0.02025242917462939, 0.02085975551145708, 0.02147442218456929, 0.02209579691237847, 0.02272320155514091, 0.02335591266876233, 0.02399316240650096, 0.02463413979047206, 0.0252779923721015, 0.02592382829706794, 0.02657071878571696, 0.02721770103431949, 0.02786378153578254, 0.02850793981040361, 0.02914913252789695, 0.02978629799113182, 0.03041836093973557, 0.03104423761788577, 0.03166284103521637, 0.03227308633280317, 0.03287389614772222, 0.03346420584977994, 0.03404296850285232, 0.03460915938105266, 0.03516177984696595, 0.03569986037582099, 0.03622246248618773, 0.03672867931516963, 0.03721763455480307, 0.03768847944729448, 0.0381403875207771, 0.03857254673553339, 0.03898414870433443, 0.03937437465105041, 0.03974237778047385, 0.04008726175096892, 0.04040805497181029, 0.04070368049066337, 0.04097292129536803, 0.041214380929786, 0.04142643941763961, 0.04160720460253112, 0.04175445914797774, 0.0418656035992709, 0.04193794003300193, 0.04196984343544428, 0.04196005316010078, 0.04190738538045276, 0.04181074282295429, 0.04166912472925695, 0.04148163699732178, 0.04124750244422729, 0.0409660711263703, 0.04063683064547435, 0.04025941636140393, 0.03983362142528263, 0.03935940653898817, 0.03883690933974907, 0.03826645330148068, 0.03764855603778486, 0.03698393688528281, 0.03627352364040012, 0.03551845831794961, 0.03472010179609811, 0.03388003720975558, 0.03300007195322491, 0.03208223815341324, 0.03112879147715703, 0.03014220814052843, 0.02912517999461101, 0.0280806075713135, 0.02701159098463915, 0.02592141859756983, 0.02481355338258154, 0.02369161692495592, 0.02255937104253566, 0.0214206970235844, 0.02027957251585555, 0.01914004613489265, 0.01800620989783233, 0.016882169630308, 0.01577201353822874, 0.01467977918271737, 0.01360941914483424, 0.01256476571617748, 0.01154949500116795, 0.01056709086582733, 0.009620809214927445, 0.008713643123240328, 0.007848289385726542, 0.007027117084249607, 0.006252138793048784, 0.005524985059893267, 0.004846882802712837, 0.004218638250642143, 0.003640625032118427, 0.003112777969166924, 0.002634593075011821, 0.002205134170481737, 0.001823046432676445, 0.001486577066891204, 0.001193603150157707, 0.000941666533112312, 0.0007280155079800732, 0.0005496527567961096, 0.0004033888891133153, 0.0002859006664768515, 0.0001937927972246595, 0.0001236619757281482, 7.216164201320763e-05, 3.606575889812669e-05, 1.232975295359373e-05, -1.853347426611363e-06, -9.003608523239226e-06, -1.131376007416918e-05, -1.061960972645326e-05, -8.383130727457333e-06, -5.689811783761434e-06, -3.261516315911095e-06, -1.485644385623708e-06, -4.608169887967972e-07, -5.860474730848275e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "angular_momentum": 2, + "j": 3 + } + ], + "beta_projectors": [ + { + "cutoff_radius": 0.7, + "radial_function": [ + -0.004236145440561064, -0.00428942961112893, -0.004343384014834632, -0.004398017082203044, -0.004453338593908482, -0.004509306574358133, -0.004566077516280165, -0.004623506539048121, -0.004681679197391345, -0.004740448848023788, -0.00480023353417019, -0.004860510454713245, -0.004921642715526012, -0.004983609109694903, -0.00504626212989317, -0.005109779115313249, -0.005174054805104425, -0.005239095515178103, -0.005305054705336902, -0.005371711197952812, -0.005439339333334271, -0.005507695818233265, -0.005576991086710901, -0.005647138181475066, -0.005718249456049566, -0.005790130966032115, -0.005862944607222348, -0.005936735436369453, -0.006011356256367044, -0.006086947173463231, -0.006163552668468078, -0.006241084291471529, -0.006319532043046849, -0.006399104333447828, -0.006479528550482358, -0.006561045969728668, -0.006643552702445296, -0.006727146952562102, -0.00681174378958069, -0.006897464228393646, -0.006984202764373328, -0.007072043549495578, -0.007160994471816639, -0.007251140110478815, -0.007342288719873501, -0.007434635060263648, -0.007528146057361112, -0.007622819117994966, -0.007718834759382375, -0.007815819520506286, -0.00791410909039732, -0.008013667589412967, -0.008114543213503703, -0.008216542425006296, -0.008319920120459949, -0.008424549673380426, -0.008530552642796403, -0.008637860281172173, -0.008746433160512754, -0.008856501772680058, -0.008967889681432584, -0.009080715659326149, -0.009194916874670294, -0.009310569634045017, -0.00942766117579989, -0.009546238523407509, -0.009666307990721696, -0.009787924893130457, -0.009911030385689924, -0.01003568931177266, -0.01016194068305897, -0.01028981099108623, -0.01041916924248075, -0.01055023221870854, -0.01068293376052835, -0.01081732653298022, -0.01095341729061567, -0.01109115959930707, -0.0112306796007507, -0.01137195779606627, -0.01151495854997843, -0.01165980899674475, -0.01180647069966782, -0.01195497576062387, -0.01210535645776115, -0.01225761548902835, -0.01241180533141809, -0.01256791640423196, -0.012726000247344, -0.01288604807090194, -0.01304816382711274, -0.0132122986046835, -0.01337846514892738, -0.01354678752589468, -0.01371712346666034, -0.01388968869538544, -0.01406438568565101, -0.01424126768701831, -0.01442043320621113, -0.01460180839043819, -0.01478547616721284, -0.01497147929649702, -0.01515974936451088, -0.0153504586927423, -0.01554352875764116, -0.01573905665922182, -0.01593699953678453, -0.01613746955421815, -0.01634044014300901, -0.01654597597811722, -0.01675408454699343, -0.01696482149148966, -0.01717821033281134, -0.01739427136740103, -0.01761309415021127, -0.01783460560305834, -0.01805894699405475, -0.01828608985481004, -0.01851608711250533, -0.018748991071083, -0.0189848091260804, -0.019223607516958, -0.01946540210715891, -0.0197102444357323, -0.01995816922616195, -0.02020919751293838, -0.02046338819222518, -0.02072078394533802, -0.02098141146528129, -0.021245326375823, -0.02151251537933201, -0.02178312588348378, -0.02205710407558004, -0.02233454046912095, -0.0226154456233864, -0.02289992468231796, -0.02318794122752173, -0.02347960719066087, -0.02377492454622769, -0.02407398299465886, -0.02437676051379472, -0.02468338278062278, -0.02499384355314992, -0.02530821306105247, -0.02562652911862485, -0.02594885239952791, -0.02627523447931884, -0.02660571148050346, -0.02694035430141329, -0.0272792003582978, -0.02762232249979168, -0.02796974793588942, -0.02832153144074638, -0.02867775447465261, -0.02903846281333789, -0.02940368593507181, -0.02977350648878316, -0.03014800734952384, -0.03052717955041924, -0.03091113527535089, -0.03129992916886727, -0.03169358977445058, -0.03209223193840142, -0.03249585322725061, -0.03290457077053914, -0.03331842956228073, -0.0337375003711388, -0.03416180376356175, -0.03459148706778432, -0.03502654184550776, -0.03546706975504374, -0.03591316347778352, -0.03636483997733696, -0.03682221278780288, -0.03728532469620439, -0.03775425635889918, -0.03822909882377855, -0.03870991266246952, -0.03919676510484812, -0.03968973560726494, -0.04018891681227663, -0.04069436918308111, -0.04120615760098, -0.04172440596186243, -0.04224915804216255, -0.04278052504224478, -0.04331855133425609, -0.04386335958343062, -0.04441499848508401, -0.04497359033578179, -0.04553918779964431, -0.04611191663667968, -0.04669183665189883, -0.04727904034339746, -0.04787363622227794, -0.04847570288321201, -0.04908533382203758, -0.04970264052765008, -0.05032770017980959, -0.0509606131372551, -0.05160149586921435, -0.05225042531217639, -0.05290750481942979, -0.05357286020477443, -0.05424657547018888, -0.05492875626545158, -0.05561950636595366, -0.05631894213469832, -0.05702716748810465, -0.05774429622873355, -0.05847044211689429, -0.05920570985272579, -0.05995022868338828, -0.06070408652696011, -0.06146743684124428, -0.06224036888455936, -0.06302301127512293, -0.06381550221437873, -0.0646179409669568, -0.06543046792930338, -0.06625320967669138, -0.06708628218471066, -0.0679298251884689, -0.06878396955366851, -0.06964883243746636, -0.0705245786300149, -0.07141131646790842, -0.07230920429371006, -0.07321836121213235, -0.07413894893044762, -0.07507109893438765, -0.07601495229961207, -0.0769706713225367, -0.07793838805062246, -0.07891826536393615, -0.07991044918427508, -0.08091509372500154, -0.08193235418525549, -0.08296239545452841, -0.08400536356089992, -0.08506143426981505, -0.08613076434280482, -0.08721351818842007, -0.0883098750884913, -0.08941999127962855, -0.09054404687770948, -0.0916822147284092, -0.0928346736527918, -0.09400159809680098, -0.0951831679355199, -0.09637956938490716, -0.09759098651149616, -0.0988176057736348, -0.1000596231659878, -0.1013172293262947, -0.1025906076885196, -0.1038799693894187, -0.1051855057580949, -0.1065074253109773, -0.1078459200703105, -0.1092012134676658, -0.1105735050906303, -0.1119630038233587, -0.1133699383492179, -0.1147945071191503, -0.116236945208063, -0.1176974644735999, -0.1191762984721638, -0.1206736766401081, -0.1221898184148564, -0.1237249672963133, -0.1252793562465602, -0.1268532249835747, -0.1284468191476416, -0.1300603739726055, -0.1316941522572035, -0.1333483929905662, -0.1350233563042197, -0.1367192988673313, -0.1384364773589764, -0.1401751607069723, -0.1419356145491215, -0.1437181031436048, -0.1455229105361269, -0.1473503033054298, -0.1492005653027092, -0.1510739776695176, -0.1529708337058616, -0.1548914156276801, -0.1568360158093682, -0.1588049400294641, -0.1607984829445567, -0.1628169490009379, -0.1648606478043149, -0.1669298879910875, -0.1690249852456269, -0.171146261931564, -0.1732940324062988, -0.175468634767157, -0.1776703913855306, -0.1798996335072498, -0.1821567066823174, -0.1844419471507066, -0.1867557059901051, -0.1890983271474707, -0.1914701679132997, -0.19387158863635, -0.1963029450258089, -0.1987646104174177, -0.2012569504938586, -0.2037803426726706, -0.2063351656242086, -0.208921803952909, -0.2115406434222028, -0.2141920774381301, -0.2168765054037385, -0.2195943201303545, -0.2223459365673793, -0.2251317591014948, -0.2279522108328866, -0.2308076989518905, -0.2336986580412517, -0.2366255135844972, -0.2395886960968439, -0.2425886518144601, -0.2456258126377563, -0.248700636833954, -0.2518135749151, -0.2549650827213689, -0.2581556204761045, -0.2613856654632279, -0.2646556788614756, -0.2679661459148879, -0.2713175496988454, -0.2747103722280491, -0.2781451138611684, -0.2816222669750806, -0.2851423394747343, -0.2887058372315049, -0.2923132735891342, -0.2959651684177983, -0.2996620461297146, -0.3034044364206787, -0.3071928766593848, -0.3110278998823698, -0.3149100568258349, -0.3188398980334555, -0.3228179771082609, -0.3268448581191925, -0.3309211044341751, -0.3350472911579613, -0.3392239939824884, -0.3434517971038699, -0.3477312841435941, -0.3520630544146375, -0.3564477029215642, -0.3608858351407242, -0.3653780567951299, -0.3699249862232536, -0.3745272421494381, -0.3791854483558165, -0.3839002348730556, -0.3886722369059059, -0.3935020941314937, -0.3983904547127835, -0.4033379642338635, -0.4083452792000958, -0.4134130622030562, -0.4185419739446786, -0.4237326882375445, -0.4289858744359933, -0.4343022153367468, -0.4396823915446462, -0.4451270911248985, -0.4506370051369167, -0.4562128310699173, -0.4618552673492878, -0.4675650195187112, -0.4733427940209608, -0.4791893014067471, -0.485105257803183, -0.4910913796104434, -0.4971483892106828, -0.5032770116884229, -0.5094779716949415, -0.5157519998170308, -0.5220998281210335, -0.5285221904468923, -0.5350198226127827, -0.5415934607841232, -0.5482438454909, -0.5549717161199464, -0.561777812113503, -0.5686628771427713, -0.575627649819335, -0.5826728744131727, -0.5897992880971403, -0.5970076325145829, -0.604298644636174, -0.6116730636079476, -0.6191316204135819, -0.6266750509426032, -0.6343040808001654, -0.6420194368040851, -0.6498218393463762, -0.6577120049725215, -0.6656906451062045, -0.6737584647672745, -0.681916163854999, -0.6901644329035924, -0.6985039567737168, -0.7069354133668648, -0.7154594650777591, -0.724076771266955, -0.7327879769900495, -0.7415937161248513, -0.750494610643085, -0.7594912694499211, -0.7685842853187295, -0.7777742377620537, -0.7870616883601395, -0.7964471819506809, -0.8059312459605597, -0.8155143853158365, -0.8251970878072276, -0.834979813940913, -0.8448630065325636, -0.8548470787381817, -0.8649324202055112, -0.8751193922935004, -0.885408326012888, -0.8957995223273615, -0.9062932506447836, -0.9168897435974536, -0.9275892005488311, -0.9383917810075811, -0.9492976052080281, -0.9603067519313133, -0.9714192559697599, -0.9826351048919895, -0.9939542403564653, -1.005376550774666, -1.01690187317889, -1.028529987829111, -1.040260619723157, -1.052093427748318, -1.064028013237769, -1.076063906030839, -1.088200569195843, -1.100437393139942, -1.112773691806122, -1.125208698727327, -1.13774156812549, -1.150371365192035, -1.163097066075528, -1.175917553502929, -1.188831613046787, -1.201837927764936, -1.214935075135169, -1.228121522386664, -1.241395621822914, -1.254755606219192, -1.268199584183176, -1.28172553522576, -1.295331304008749, -1.309014595043984, -1.322772969311359, -1.336603834835119, -1.350504443983895, -1.364471886360564, -1.378503082317761, -1.392594776096957, -1.406743531933217, -1.420945723344046, -1.435197529051667, -1.449494923943116, -1.463833673610692, -1.478209324683938, -1.49261719833648, -1.507052381090088, -1.521509718395932, -1.535983804166329, -1.550468972089656, -1.564959288534017, -1.579448541613569, -1.59393023259524, -1.608397564861754, -1.622843435854429, -1.637260426251707, -1.651640788437913, -1.665976437969561, -1.680258940569782, -1.6944795028339, -1.708628959832869, -1.722697763332492, -1.736675971424429, -1.750553235220792, -1.764318786539725, -1.777961427137548, -1.791469514279057, -1.80483094901277, -1.818033162607655, -1.831063104108137, -1.843907226723249, -1.856551473714347, -1.868981267022602, -1.881181490320528, -1.893136479210887, -1.904830003848314, -1.916245258520679, -1.927364844545376, -1.938170759543713, -1.948644382072583, -1.958766458295032, -1.968517089170284, -1.977875716958423, -1.986821111140621, -1.995331358266747, -2.003383846668219, -2.010955256567473, -2.018021547460498, -2.024557947874992, -2.030538944038422, -2.035938271163131, -2.040728903436771, -2.044883047231798, -2.048372131852061, -2.05116680617597, -2.053236930331851, -2.054551575059572, -2.055079017105742, -2.054786739377767, -2.053641431246697, -2.051608991672997, -2.048654532742149, -2.044742386845436, -2.039836115831857, -2.033898521387676, -2.026891659659925, -2.018776857388485, -2.00951473273841, -1.999065216800639, -1.987387581656444, -1.974440469354269, -1.960181926889625, -1.944569445027283, -1.927559999861978, -1.909110102300095, -1.889175849695591, -1.867712984649344, -1.844676959359929, -1.820023004320098, -1.793706207020211, -1.765681592363861, -1.735904215298983, -1.704329257223276, -1.670912131516685, -1.63560859818125, -1.598374884728806, -1.559167818820089, -1.517944968412311, -1.474664791855282, -1.429286798631207, -1.38177172129218, -1.332081695925982, -1.280180456913911, -1.226033541152128, -1.169608505259668, -1.110875154702681, -1.049805785822857, -0.9863754407531564, -0.9205621747596786, -0.8523473380189253, -0.781715869761062, -0.7086566068891331, -0.6331626051817056, -0.555231474748255, -0.474865728443536, -0.3920731438964959, -0.3068671378768384, -0.2192671532115207, -0.1292990576063935, -0.03699555386777398, 0.05760339981189011, 0.154450160607011, 0.2534889625898162, 0.3546554728745055, 0.4578763415063283, 0.5630687487514137, 0.6701399505246743, 0.7789868253687744, 0.889495425203882, 1.001540532125171, 1.114985225497066, 1.229680463053038, 1.345464678209118, 1.462163400485117, 1.579588901699796, 1.697539873961404, 1.815801145805239, 1.934143440311514, 2.052323184683768, 2.170082375632647, 2.287148509671203, 2.403234585637567, 2.5180391881847, 2.631246660561325, 2.742527376962445, 2.85153812314824, 2.957922596795342, 3.061312036693904, 3.161325993361668, 3.257573250277243, 3.349652909119815, 3.437155648548726, 3.519665169257007, 3.596759836147758, 3.668014528023581, 3.733002706648693, 3.791298713651267, 3.842480305615743, 3.886131434629366, 3.921845281498173, 3.949227546247316, 3.967900000056456, 3.97750429857956, 3.977706055984622, 3.96819917465992, 3.948710422630255, 3.919004247105888, 3.878887807968711, 3.828216210031274, 3.766897908421353, 3.694900254482079, 3.612255144677755, 3.519064726731271, 3.415507111568573, 3.301842030227068, 3.178416368409473, 3.045669501590171, 2.904138345763152, 2.754462029059302, 2.597386081745728, 2.43376603136804, 2.264570282942931, 2.090882154253252, 1.913900929313071, 1.734941786147625, 1.555434448911336, 1.376920410428409, 1.20104856856616, 1.029569119116133, 0.8643255504929825, 0.7072445898717372, 0.5603239592089531, 0.4256178120026625, 0.3052197317653963, 0.2012432527667189, 0.1157993255195522, 0.05097565370468697, 0.008769705606640995, -0.008556295962291932, -0.002247486482306559, 0.0002442612477649972, -1.440063680524434e-05, 1.065910399878816e-05, 1.164363213937573e-05, 1.476790489450894e-05, 1.595716402653931e-05, 1.660534395830551e-05, 1.718052277095154e-05, 1.768820883895906e-05, 1.813074707962619e-05, 1.851076287778087e-05 + ], + "ultrasoft_cutoff_radius": 1.2, + "angular_momentum": 0, + "label": "1S" + }, + { + "cutoff_radius": 0.7, + "radial_function": [ + 0.02275752590168654, 0.02304378043537399, 0.02333363561154625, 0.02362713672085187, 0.02392435448695847, 0.02422518331037957, 0.02452996947867616, 0.02483856456820039, 0.02515095277333674, 0.02546705733863531, 0.025787761617317, 0.02611194549358687, 0.02644035411862119, 0.02677299639447492, 0.0271097420238746, 0.02745084466468298, 0.02779611295313406, 0.02814565472191767, 0.02849989108511643, 0.02885814404231724, 0.02922132254982244, 0.02958874075384335, 0.02996094989034211, 0.03033775906091779, 0.0307196003456028, 0.03110582331373368, 0.03149707529992228, 0.03189340257047121, 0.03229446566579632, 0.032700558632603, 0.03311198932538503, 0.03352847251244981, 0.03395017946473741, 0.03437734573108631, 0.03480953341458237, 0.03524751798842923, 0.03569078350355492, 0.03613981447036719, 0.0365942954595825, 0.03705467959673507, 0.03752071220023333, 0.03799268847396408, 0.03847064783093941, 0.03895459843665787, 0.03944447169380601, 0.03994065389114509, 0.04044296983980107, 0.04095168374301818, 0.0414670975678023, 0.04198843224153887, 0.04251649706637901, 0.04305134245975831, 0.04359303849844781, 0.04414117519339433, 0.04469644745897856, 0.04525863197965355, 0.04582801652579794, 0.0464045031414576, 0.04698794546766033, 0.04757908911903837, 0.04817759689776227, 0.04878361260508048, 0.04939721990619939, 0.05001851910235025, 0.05064762801007003, 0.05128469470317861, 0.05192973666008735, 0.05258301157807757, 0.05324437658141541, 0.05391409762297408, 0.05459229005582687, 0.05527910267186935, 0.05597426098804487, 0.05667829618919075, 0.05739124055575098, 0.05811316339033352, 0.05884424427125337, 0.05958431431620139, 0.06033379102918583, 0.06109277752210676, 0.06186109178198251, 0.06263925011482939, 0.0634271392406527, 0.0642249238483506, 0.06503281099381315, 0.06585075917197361, 0.06667911153187323, 0.0675178156011808, 0.06836700425096347, 0.06922695903909369, 0.07009778862562902, 0.07097951871100795, 0.07187229617361515, 0.07277640714318637, 0.07369166232147613, 0.07461864576355288, 0.07555720644468983, 0.07650754552519179, 0.07746991616774618, 0.0784443738125495, 0.07943109660115501, 0.08043023603141732, 0.08144183611018727, 0.08246625091635489, 0.08350352707923578, 0.0845539192565745, 0.08561737874971863, 0.08669435225285917, 0.08778473539438197, 0.08888896594604435, 0.09000699199258672, 0.09113911481645814, 0.09228552541999199, 0.09344626685229299, 0.09462174029012399, 0.09581185324206055, 0.09701702104708224, 0.09823735553974376, 0.0994729184842569, 0.1007241578388861, 0.1019910693407672, 0.1032739458662475, 0.1045729476822164, 0.1058882654636442, 0.1072201775583393, 0.108568798301883, 0.1099343916347693, 0.1113171924356005, 0.1127173252557471, 0.1141351503030935, 0.1155706585152638, 0.1170243755379416, 0.1184963280819115, 0.1199867453125466, 0.1214959368691523, 0.1230241617222119, 0.1245715430227505, 0.1261384100809985, 0.1277249905450697, 0.1293315577214312, 0.130958254187269, 0.1326054620836254, 0.1342733868891967, 0.1359622677366201, 0.13767236639955, 0.1394040318305331, 0.1411574346460033, 0.142932910967401, 0.1447306944285836, 0.1465511106241015, 0.1483944400380255, 0.150260921917598, 0.1521508782464027, 0.1540645979793012, 0.1560024550183459, 0.1579645807016861, 0.159951411644256, 0.161963319193113, 0.1640004083448272, 0.1660631665143727, 0.1681518741322258, 0.1702668279538418, 0.1724084355420474, 0.1745768819351302, 0.1767726896149911, 0.1789960641935349, 0.181247458908166, 0.1835270553208917, 0.1858354202307116, 0.1881727751280488, 0.1905394842724601, 0.1929360414397978, 0.1953626825487833, 0.1978198527517984, 0.200307941592078, 0.2028272550271114, 0.205378317915247, 0.2079614573528018, 0.2105770365721508, 0.2132255479738975, 0.2159073426143494, 0.2186228863846854, 0.2213725159525609, 0.2241567596591409, 0.2269760381215072, 0.2298307576171569, 0.2327213441372569, 0.2356483317917384, 0.2386120402746573, 0.2416131095644202, 0.2446518163755869, 0.2477288240285733, 0.2508444820977349, 0.2539992825365819, 0.2571938111089374, 0.2604284630721253, 0.263703786496705, 0.2670203231370159, 0.2703785260126932, 0.2737789726473407, 0.2772221716879187, 0.2807086576632883, 0.2842389463292018, 0.287813648489236, 0.2914333211636033, 0.2950984640836056, 0.2988096781864681, 0.3025675674793625, 0.3063726697547833, 0.3102256468188943, 0.3141270475575073, 0.3180774718439525, 0.3220776081597759, 0.3261279552824988, 0.3302292919434125, 0.3343821304092495, 0.3385871693885459, 0.342845100430301, 0.3471565151683215, 0.3515221545601341, 0.3559426542396611, 0.360418706220726, 0.3649510318669776, 0.3695403030384514, 0.3741872501693251, 0.378892611704113, 0.3836571023932508, 0.3884814898556133, 0.3933664639043013, 0.3983128710404717, 0.4033214084829241, 0.4083928830728197, 0.4135281070028949, 0.4187278351343768, 0.4239929170609051, 0.4293241436701919, 0.4347223646748215, 0.4401884054788527, 0.4457231188472607, 0.4513273628158315, 0.4570020128819444, 0.4627479480521876, 0.4685660690277333, 0.4744572786140732, 0.4804224911782919, 0.486462633240733, 0.492578636808376, 0.4987714706511581, 0.5050420764876291, 0.5113914417582006, 0.5178205347348257, 0.5243303657735462, 0.5309219477301158, 0.5375963079802647, 0.5443544834675704, 0.5511974890669621, 0.5581264306615408, 0.5651423531772314, 0.5722463710063777, 0.5794395446420904, 0.5867230403191575, 0.5940979548979435, 0.6015654241482578, 0.609126638867832, 0.6167827302763793, 0.6245349089331341, 0.6323843639256641, 0.6403323245657923, 0.6483800193211849, 0.6565286693368309, 0.6647795575424164, 0.6731339514872704, 0.6815931399224925, 0.6901584525471076, 0.6988311716952811, 0.7076126721792952, 0.7165042968888767, 0.725507414483597, 0.7346234228891889, 0.7438537170721202, 0.7531997340972124, 0.7626629060723854, 0.7722446865077895, 0.7819465684968991, 0.7917700360050317, 0.8017165870914451, 0.8117877720557636, 0.8219851466341674, 0.8323102562503067, 0.8427646917613854, 0.8533500699564962, 0.8640680157295424, 0.8749201647199274, 0.8859081923551841, 0.8970337780732288, 0.9082986199375913, 0.9197044623253381, 0.9312530235933965, 0.9429461064780107, 0.9547854814529153, 0.9667729417180204, 0.9789103528559946, 0.9911995418968554, 1.003642397470551, 1.016240813483483, 1.028996707521607, 1.04191203366808, 1.054988739096723, 1.068228834140196, 1.081634315127163, 1.095207226600196, 1.10894962915955, 1.12286361279695, 1.136951269682836, 1.151214753408089, 1.165656213271631, 1.180277821952536, 1.195081809951369, 1.210070390540429, 1.225245857195088, 1.240610452366267, 1.25616652415661, 1.271916403179852, 1.28786244308231, 1.304007059394141, 1.320352648120613, 1.336901674792992, 1.353656617337847, 1.370619973527105, 1.38779426225066, 1.405182078478514, 1.422785973018005, 1.440608589560671, 1.458652573025592, 1.476920585484302, 1.495415353273279, 1.514139595422269, 1.533096091730044, 1.552287631611256, 1.571717038245287, 1.591387174629227, 1.611300926636164, 1.631461218246779, 1.651871002141798, 1.672533243626928, 1.693450970521627, 1.714627227387832, 1.736065087038265, 1.757767658362323, 1.779738083223221, 1.801979537076292, 1.824495225401322, 1.847288389691786, 1.870362289730824, 1.893720244153303, 1.917365581478591, 1.941301680168763, 1.965531925978121, 1.990059774765293, 2.014888686521023, 2.040022165445937, 2.065463747572516, 2.09121699984499, 2.11728552917802, 2.143672971112597, 2.170382991094969, 2.197419289425469, 2.224785608944734, 2.252485710696538, 2.280523400642303, 2.308902508486788, 2.337626904100984, 2.36670048384515, 2.396127178255417, 2.425910947013682, 2.456055792693097, 2.486565731656549, 2.517444830019129, 2.548697170543178, 2.5803268678931, 2.612338073685727, 2.644734964315697, 2.677521743988492, 2.710702653698821, 2.744281948540101, 2.778263920383738, 2.812652889913314, 2.847453196125588, 2.882669207324388, 2.918305312969742, 2.954365932265018, 2.990855502623013, 3.027778482156324, 3.065139356432812, 3.102942621295366, 3.141192800222998, 3.179894424729588, 3.21905205053689, 3.25867024100188, 3.298753583986586, 3.339306662337218, 3.380334089901317, 3.421840470783747, 3.463830426315717, 3.506308577604555, 3.549279554256923, 3.592747982651876, 3.636718489100959, 3.681195700728839, 3.726184232251234, 3.77168869434085, 3.817713696358801, 3.864263812242374, 3.911343622267858, 3.958957675856068, 4.007110503160251, 4.055806611062637, 4.105050479273147, 4.154846547056083, 4.205199230873941, 4.256112895405769, 4.307591869173586, 4.359640436327759, 4.412262816703899, 4.465463191276378, 4.519245658464397, 4.573614273239199, 4.628573001733734, 4.684125741640755, 4.740276309884674, 4.797028427868527, 4.854385731412242, 4.912351751627681, 4.970929912212436, 5.03012352669852, 5.089935783717699, 5.15036974588823, 5.211428339875215, 5.273114349104664, 5.335430401425759, 5.398378968470315, 5.461962349332691, 5.526182663468807, 5.591041842484463, 5.656541623471904, 5.722683521781762, 5.789468848240889, 5.856898669723313, 5.924973815419004, 5.993694861637608, 6.063062111739272, 6.133075588075144, 6.203735025183144, 6.275039843039963, 6.346989139392696, 6.419581674828278, 6.492815859759342, 6.566689729353492, 6.641200935647454, 6.716346726318691, 6.792123926149145, 6.868528921788061, 6.945557638770956, 7.023205525423441, 7.101467528302152, 7.18033807213825, 7.259811044315107, 7.339879759682378, 7.420536948138199, 7.501774725941539, 7.583584571234473, 7.665957296029893, 7.748883027375381, 7.832351168396499, 7.91635038066129, 8.000868546303437, 8.085892746304118, 8.171409222356056, 8.25740334924161, 8.343859598932346, 8.430761511360728, 8.518091654982753, 8.60583159048607, 8.69396184027246, 8.782461844253293, 8.871309924274396, 8.960483241052307, 9.049957757107737, 9.13970819315602, 9.229707983235624, 9.31992923344162, 9.410342674505172, 9.500917616385026, 9.591621901187654, 9.68242185245676, 9.773282228754386, 9.864166171646758, 9.955035151255128, 10.04584891960566, 10.13656545149982, 10.22714089192729, 10.31752949923504, 10.40768359154151, 10.49755348562625, 10.58708743941725, 10.67623159638913, 10.76492991867323, 10.85312413477049, 10.94075367128667, 11.02775559732176, 11.11406455549554, 11.19961270697922, 11.28432966412507, 11.36814242812674, 11.45097532826767, 11.53274995792758, 11.61338511120994, 11.69279672529423, 11.77089781403772, 11.8475984108294, 11.92280550939463, 11.9964230051893, 12.06835163809561, 12.13848893998927, 12.20672917944976, 12.27296331525368, 12.33707894402921, 12.39896026300623, 12.45848801929082, 12.51553948279428, 12.56998840667784, 12.62170500108129, 12.67055591078067, 12.71640419770334, 12.75910932832263, 12.798527170412, 12.8345099979269, 12.86690649786061, 12.89556179474899, 12.9203174749429, 12.94101163248736, 12.95747891255249, 12.96955057904284, 12.97705458626174, 12.97981566928145, 12.97765544703469, 12.97039253914429, 12.957842703664, 12.93981898820475, 12.91613190229489, 12.88658961100016, 12.85099814368953, 12.80916163621424, 12.76088258337246, 12.70596212934018, 12.64420037586475, 12.57539672193333, 12.49935023440536, 12.41586004420682, 12.32472578250786, 12.22574804499876, 12.11872889320553, 12.00347239392376, 11.8797851990218, 11.74747715875459, 11.60636198610768, 11.45625795709285, 11.29698865949898, 11.1283837862604, 10.95027997761112, 10.76252171163995, 10.56496224310727, 10.35746459631484, 10.13990260755568, 9.912162021285667, 9.674141638987429, 9.425754520971026, 9.166929242998794, 8.897611205600079, 8.617763995560372, 8.327370799375064, 8.026435867090854, 7.714986025558114, 7.393072237040412, 7.060771200769015, 6.718186996896615, 6.365452762867991, 6.002732403994669, 5.630222327480061, 5.24815319617429, 4.85679169451197, 4.456442295099852, 4.047449022947221, 3.630197200100654, 3.205115161538231, 2.772675932265801, 2.333398845131675, 1.88785108927718, 1.43664916895332, 0.9804602527115769, 0.5200033980612222, 0.05605062122699866, -0.4105722036348345, -0.8789846473872513, -1.348251254746149, -1.817381330131089, -2.285329010320873, -2.750993656572649, -3.213220600700549, -3.670802281765115, -4.122479807941289, -4.566944986724582, -5.002842857102809, -5.428774771301252, -5.843302060733328, -6.244950332161063, -6.632214434374142, -7.003564133274952, -7.357450540568164, -7.692313327470055, -8.00658876469651, -8.298718616838526, -8.567159921451987, -8.810395674961715, -9.026946443752369, -9.215382908297295, -9.374339344364838, -9.502528030362061, -9.598754562965148, -9.661934048148817, -9.691108120772217, -9.685462730294944, -9.644346612055607, -9.567290343921144, -9.454025868105377, -9.304506332014183, -9.11892608186743, -8.897740610570382, -8.641686239620796, -8.351799280502648, -8.02943439424202, -7.676281834682522, -7.29438323081729, -6.886145531728259, -6.45435270682505, -6.002174764162387, -5.533173623973322, -5.051305356013245, -4.560918272978658, -4.066746351404754, -3.573897444295717, -3.087835745128932, -2.614357967328571, -2.159562720450429, -1.729812588487668, -1.331688457515993, -0.971935680060071, -0.6574018677962227, -0.3949648685581716, -0.1914624641883403, -0.0535103483248966, 0.01168950271511357, 0.005255590542294613, -0.0006060192863918614, 5.473724491625235e-06, -5.113616814124558e-05, -5.153482750415373e-05, -5.71358903269891e-05, -5.811039705626238e-05, -5.782760203387218e-05, -5.746072418136504e-05, -5.702081311038419e-05, -5.651148076751106e-05, -5.593699225491955e-05 + ], + "ultrasoft_cutoff_radius": 1.2, + "angular_momentum": 0, + "label": "2S" + }, + { + "cutoff_radius": 2.0, + "radial_function": [ + -2.293354347213315e-05, -2.351410877306893e-05, -2.410937114861742e-05, -2.471970265668855e-05, -2.534548477386759e-05, -2.598710863384895e-05, -2.664497527190642e-05, -2.731949587555188e-05, -2.80110920415395e-05, -2.872019603937611e-05, -2.9447251081502e-05, -3.019271160031161e-05, -3.095704353218685e-05, -3.174072460872073e-05, -3.25442446553135e-05, -3.336810589732716e-05, -3.421282327399078e-05, -3.507892476025217e-05, -3.596695169677696e-05, -3.687745912830207e-05, -3.781101615055382e-05, -3.876820626594887e-05, -3.974962774829938e-05, -4.075589401675073e-05, -4.178763401918547e-05, -4.284549262533277e-05, -4.393013102982971e-05, -4.504222716548594e-05, -4.618247612700984e-05, -4.735159060546173e-05, -4.855030133370437e-05, -4.977935754313078e-05, -5.10395274319535e-05, -5.233159864534862e-05, -5.365637876775496e-05, -5.501469582763479e-05, -5.64073988150132e-05, -5.783535821211873e-05, -5.929946653745663e-05, -6.080063890365593e-05, -6.233981358943733e-05, -6.391795262606084e-05, -6.553604239861933e-05, -6.719509426255296e-05, -6.889614517577166e-05, -7.064025834677771e-05, -7.242852389919733e-05, -7.426205955313323e-05, -7.61420113237661e-05, -7.806955423764082e-05, -8.00458930670845e-05, -8.207226308321705e-05, -8.414993082802304e-05, -8.628019490596875e-05, -8.846438679565875e-05, -9.070387168203825e-05, -9.300004930966323e-05, -9.535435485757056e-05, -9.776825983629421e-05, -0.0001002432730075906, -0.000102780941327444, -0.0001053828509129456, -0.0001080506280336476, -0.0001107859401280126, -0.0001133536544804121, -0.0001162301337514545, -0.0001191769189250315, -0.0001223343900367638, -0.0001254322801558334, -0.0001286107533740036, -0.0001318664496517679, -0.000135210538252164, -0.0001386338733080921, -0.0001421478494790862, -0.0001457481089339407, -0.0001494407074178811, -0.0001532256600348974, -0.0001571070484144561, -0.0001610909282321275, -0.0001651682304041698, -0.0001693537965289757, -0.0001736415008941976, -0.0001780414059835092, -0.0001825515169172185, -0.0001871731747991259, -0.0001919144046400989, -0.0001967762784422778, -0.0002017596327047033, -0.0002068718140143039, -0.0002121101311135316, -0.0002174813327254373, -0.0002229913590102921, -0.0002286389649385328, -0.0002344280102051393, -0.0002403645576579247, -0.0002464531367645446, -0.0002526940532826591, -0.0002590938142526443, -0.0002656588170061288, -0.0002723813456550908, -0.0002792836115468302, -0.0002863549758346643, -0.0002936051385414172, -0.0003010445040317402, -0.0003086645875316066, -0.000316482669633418, -0.0003244968547016854, -0.0003327118548344132, -0.0003411409506139195, -0.0003497806917193116, -0.0003586338698375265, -0.0003677163419158372, -0.000377028105842196, -0.00038657646411827, -0.0003963647076638799, -0.0004064018611085056, -0.0004166913774391555, -0.0004272417491914082, -0.0004380643355557895, -0.0004491516818155182, -0.0004605268916460661, -0.0004721853311606161, -0.0004841432472214315, -0.0004964020639086693, -0.0005089690646023087, -0.0005218579858731773, -0.0005350717746523354, -0.0005486180882103703, -0.0005625116085987524, -0.0005767549331656952, -0.0005913542895786145, -0.0006063307317058161, -0.0006216763858970641, -0.0006374245671981459, -0.0006535608584676244, -0.0006701086687812729, -0.0006870745326931028, -0.0007044689666941795, -0.0007223077271270782, -0.0007405937303642446, -0.0007593440017363718, -0.0007785717648666864, -0.0007982832295848209, -0.0008184952456642427, -0.0008392155550933293, -0.0008604649254200762, -0.0008822472593648949, -0.0009045863917764663, -0.0009274892703719753, -0.0009509715252868187, -0.0009750442881595663, -0.0009997348125918523, -0.001025042345246858, -0.00105099546553491, -0.001077603289582377, -0.001104884061964799, -0.001132858327061771, -0.001161538156664138, -0.00119094737275051, -0.001221097336380834, -0.001252011566368765, -0.001283710233223267, -0.001316207024030796, -0.001349534955725021, -0.001383694826524604, -0.001418728567049111, -0.00145464507786804, -0.001491470529271361, -0.001529233022433478, -0.001567944522628502, -0.001607641039548402, -0.001648341581387376, -0.001690069594874378, -0.001732859929376648, -0.001776725166325124, -0.001821708646084875, -0.001867824366806655, -0.001915110728315213, -0.001963595869747362, -0.002013302346285733, -0.002064278153342013, -0.002116530646922321, -0.002170114443078148, -0.002225056359583452, -0.002281379124029794, -0.002339139588944612, -0.002398355413280448, -0.002459070159686258, -0.002521327009611129, -0.002585150716268847, -0.00265060005902545, -0.002717698547211776, -0.002786497286341914, -0.002857042245500369, -0.002929366440088111, -0.003003527670982843, -0.003079561272061093, -0.003157520393125767, -0.003237457264216289, -0.003319410597521708, -0.003403444318834146, -0.003489600477030174, -0.003577939710213749, -0.003668521105411582, -0.003761381976253398, -0.003856606343111172, -0.003954237514809303, -0.004054336910500332, -0.004156976808907392, -0.004262206366864968, -0.004370103864943626, -0.004480732018440538, -0.004594161711601785, -0.004710463263651954, -0.0048297013348024, -0.004951970163591719, -0.005077323551380122, -0.005205854630304264, -0.005337644001917682, -0.005472757926442194, -0.005611301015403562, -0.005753346340841723, -0.005898986742330742, -0.006048320547932065, -0.006201426387802324, -0.006358413451533267, -0.006519369358011571, -0.006684401290845534, -0.006853617561544843, -0.007027105072413997, -0.007204992085265106, -0.007387375523070002, -0.007574380551016934, -0.007766121254428443, -0.007962707101480853, -0.008164275671754618, -0.00837094469913433, -0.008582841202141684, -0.008800108075688385, -0.009022865950590896, -0.009251270770325908, -0.009485451796171948, -0.009725560128723214, -0.009971748503156946, -0.01022416471705761, -0.01048297443269635, -0.01074833387063467, -0.01102040378546662, -0.01129936685877012, -0.01158538532256993, -0.01187864911413467, -0.01217932995638171, -0.01248762436330999, -0.01280372080027851, -0.01312781751703416, -0.01346011626939334, -0.01380082628926951, -0.01415015963045809, -0.01450833465386657, -0.0148755716263117, -0.01525210693982246, -0.01563817174254623, -0.01603400704511326, -0.01643985898526214, -0.01685598007813042, -0.01728263842919077, -0.01772009115139205, -0.01816861442442488, -0.01862849024723726, -0.01910000300433124, -0.01958345006393706, -0.02007913122208066, -0.02058735843520003, -0.02110844478227846, -0.02164271648975902, -0.02219051170810843, -0.0227521706609543, -0.02332803831370148, -0.02391848450045034, -0.02452386283875728, -0.02514456894488084, -0.0257809792910508, -0.02643349127348282, -0.02710251957218859, -0.0277884711244516, -0.02849178702615776, -0.0292128945978271, -0.029952248727228, -0.0307103130659034, -0.03148755539410914, -0.03228446847331599, -0.03310153850680083, -0.03393928536472192, -0.03479822933555494, -0.03567890179605181, -0.03658185765499024, -0.03750765927984462, -0.03845688436375753, -0.039430124084188, -0.04042798260860152, -0.04145108792352337, -0.04250007881687771, -0.04357560376344346, -0.04467833527480667, -0.0458089630080762, -0.04696819401147064, -0.04815674932113754, -0.04937536689697753, -0.05062481011813084, -0.05190585943342962, -0.05321931045128885, -0.05456598520081815, -0.05594671336891385, -0.05736240686798048, -0.05881383256292415, -0.06030192099066442, -0.06182788361468272, -0.06339212871105077, -0.06499623700301642, -0.06664062311424708, -0.06832680348047347, -0.07005547540208022, -0.07182804984226529, -0.0736452093012551, -0.07550846791940961, -0.07741888817461863, -0.07937729571369811, -0.08138567996363563, -0.08344445691968436, -0.08555560935358447, -0.08771979113292033, -0.08993892524790002, -0.09221399745547258, -0.09454672722469662, -0.0969383397381683, -0.09939042249664978, -0.1019043304776475, -0.1044821420046272, -0.1071246921184602, -0.1098343901832295, -0.1126121298613504, -0.1154605317688003, -0.1183805344345062, -0.1213745309012886, -0.1244441867988013, -0.1275911390725958, -0.1308178215939968, -0.1341261064770192, -0.1375177441611305, -0.140995026664566, -0.1445603461122608, -0.148215545880827, -0.1519631975646409, -0.1558052581476278, -0.1597446457603485, -0.1637832335643437, -0.1679239702533611, -0.1721692441075726, -0.1765213225871993, -0.1809838919904188, -0.1855585733499972, -0.1902489409563062, -0.1950577328906008, -0.1999876097508491, -0.2050423297977683, -0.2102242355140448, -0.2155367932547008, -0.2209839211555237, -0.2265677249225165, -0.2322929155348409, -0.2381623709282598, -0.2441794876914557, -0.2503488104350007, -0.2566736765407926, -0.2631575423783593, -0.2698050215520205, -0.2766204122386143, -0.2836072968625574, -0.2907701501150639, -0.2981135263478161, -0.3056419432221901, -0.313360255674825, -0.3212724136256314, -0.3293844637851442, -0.3377001403435803, -0.3462258186975402, -0.3549657172775201, -0.363925507575686, -0.37311131606298, -0.3825274182202016, -0.392181533541724, -0.4020771271821846, -0.4122231298997299, -0.4226229662590339, -0.433285041898153, -0.4442147716670123, -0.4554194482919858, -0.4669053326778544, -0.4786802340447442, -0.4907506921589773, -0.5031242196379436, -0.5158085279208275, -0.5288111787885091, -0.5421401855135137, -0.5558039290317379, -0.5698103579306562, -0.5841672845276342, -0.5988855392840086, -0.61397193354927, -0.6294365868016161, -0.6452888943597109, -0.6615380013556811, -0.6781945117253108, -0.695267625706867, -0.7127683126133237, -0.7307067696351371, -0.7490939778225639, -0.7679408816436267, -0.7872593310206063, -0.8070601305822779, -0.8273561380156028, -0.84815824796136, -0.8694804963302325, -0.8913342619316041, -0.9137337574344868, -0.93669143121397, -0.960221912630818, -0.9843380985762811, -1.009055574896047, -1.034387907076547, -1.060351560586763, -1.086960285378956, -1.114231003967001, -1.142179762623134, -1.170822261012509, -1.200176509547146, -1.230259030442605, -1.261087804324239, -1.292680913728853, -1.325056995254029, -1.358234730679609, -1.392233719868503, -1.427073766143465, -1.462774992391698, -1.49935872645762, -1.53684537774687, -1.575257163406132, -1.614616154825814, -1.654945070628188, -1.696267118249368, -1.738606029805092, -1.781985968413131, -1.826431903835192, -1.87196897993146, -1.918623292406328, -1.966421049427359, -2.015389695569182, -2.065556503454547, -2.116949814017149, -2.16959858893378, -2.223532083810303, -2.278780732732087, -2.335374658561586, -2.393346057903463, -2.452726248143311, -2.513548374464278, -2.575845530343575, -2.639652123110421, -2.705002604977576, -2.771932634267331, -2.840478341173454, -2.91067673165016, -2.982565353158248, -3.056182754797311, -3.131567915735659, -3.208760783190746, -3.287802273341164, -3.368733549880091, -3.451596900658016, -3.536435650279978, -3.623293188498619, -3.712214468167101, -3.803244766950189, -3.89643041264211, -3.991818394217829, -4.08945659665337, -4.189393802553832, -4.29167944076697, -4.396363831456781, -4.503498231737682, -4.613134572452416, -4.725325640189993, -4.840125051098026, -4.95758732506213, -5.077767516498668, -5.200721846278146, -5.326507000192861, -5.455180578523694, -5.586801066537744, -5.721427421411764, -5.859119555143514, -5.999938080230123, -6.143944118691135, -6.291199835914974, -6.441767562918296, -6.595710736131988, -6.753093022426548, -6.913978846983431, -7.078433127868176, -7.246521194114369, -7.418308911709309, -7.593862556449904, -7.773248604586795, -7.956534049671273, -8.143785917164035, -8.335071606190583, -8.530458449629183, -8.730013986299333, -8.933805583621652, -9.141900603844237, -9.354366142620252, -9.571269068503023, -9.79267580878334, -10.01865231738596, -10.24926394596293, -10.48457538406166, -10.72465036538478, -10.9695517709207, -11.21934128054836, -11.47407937201494, -11.73382511192882, -11.99863588876828, -12.26856749801721, -12.54367365143185, -12.82400604290921, -13.10961395954756, -13.40054419305402, -13.69684073666149, -13.99854457744545, -14.30569348829406, -14.61832168571718, -14.9364595839197, -15.26013362673543, -15.58936575507146, -15.92417334173798, -16.26456864076794, -16.61055868177981, -16.96214465335932, -17.31932176218375, -17.6820786929758, -18.05039729778322, -18.42425207338358, -18.80360984630233, -19.18842922673349, -19.57866017497133, -19.97424346969294, -20.37511030710496, -20.78118160527712, -21.19236763757835, -21.60856731483278, -22.02966776238524, -22.45554352668125, -22.88605616601647, -23.32105344397638, -23.76036877808339, -24.2038205393305, -24.65121133421472, -25.10232735133027, -25.55693759325759, -26.01479319564796, -26.4756265949532, -26.93915087227114, -27.40505887072074, -27.87302248710155, -28.34269184239522, -28.81369450335657, -29.28563463005326, -29.75809222677809, -30.23062231721945, -30.70275406402214, -31.17399011825543, -31.64380565002369, -32.11164772919935, -32.57693444960832, -33.03905423967491, -33.49736514740377, -33.95119411928236, -34.39983640187996, -34.84255489103111, -35.27857960715988, -35.70710715984637, -36.12730035823935, -36.53828778213921, -36.93916355492982, -37.32898709059627, -37.70678304458454, -38.07154126640206, -38.42221697088748, -38.75773097619272, -39.07697009229275, -39.37878768414785, -39.66200440011387, -39.92540904505854, -40.16775970941463, -40.38778504622383, -40.58418582563399, -40.75563668006029, -40.900788150601, -41.01826899697679, -41.10668875490556, -41.1646406641995, -41.19070487911787, -41.18345202505988, -41.14144713670012, -41.06325392344633, -40.94743949343716, -40.7925794037233, -40.59726321721067, -40.36010041185109, -40.07972681513458, -39.75481144290193, -39.38406382705018, -38.96624182502539, -38.50015989749602, -37.98469789440114, -37.41881027778819, -36.80153586431247, -36.13200803409686, -35.40946532622984, -34.63326258909152, -33.80288240411659, -32.91794699879473, -31.97823042432425, -30.98367104885712, -29.9343843155392, -28.8306756127967, -27.67305328387152, -26.46224166125956, -25.19919396895735, -23.8851050743149, -22.52142391713885, -21.10986546660781, -19.65242214255556, -18.15137439500121, -16.60930040411635, -15.02908464652249, -13.41392505466011, -11.76733869279699, -10.09316553249874, -8.39557021328013, -6.679041417319532, -4.948388592309572, -3.208735750830352, -1.465511964727899, 0.2755617413082874, 2.008489407604159, 3.72702156695075, 5.424681194220266, 7.094794818022937, 8.730528931336064, 10.32493218193808, 11.87098367103894, 13.36164764223668, 14.78993495009778, 16.1489715523427, 17.43207424806588, 18.63283397481204, 19.74520670565101, 20.76361213045791, 21.68294180710163, 22.49808153000331, 23.20396705585947, 23.79572628056403, 24.26873425777971, 24.61867375937917, 24.84160161727615, 24.93402095264432, 24.89295919702957, 24.71605189411066, 24.40163192516016, 23.94882375679517, 23.35764222587014, 22.62909494387757, 21.76528760268745, 20.76953099586894, 19.64644857078506, 18.40208331401963, 17.04400242269752, 15.58139847526126, 14.02518556365311, 12.38808895587106, 10.68472688720919, 8.931683033934098, 7.147568344250463, 5.353070777470093, 3.570991602346783, 1.826266777457321, 0.1459718094490423, -1.440691569796566, -2.902428809833593, -4.205908136309549, -5.315840589671832, -6.195087126961686, -6.804795385920377, -7.104569840773388, -7.052671954175979, -6.606316348987533, -5.721457005513444, -4.357986595374313, -2.432840754973053, -0.2797404405288995, 0.02974975583803616, -0.0007651236329029331, 0.002430627609119369, 0.002284988594349788, 0.002471096175564709, 0.002624413001933723, 0.00278070477562836, 0.002936033542164989, 0.003090384245235107, 0.003243342713985221 + ], + "ultrasoft_cutoff_radius": 2.2, + "angular_momentum": 1, + "label": "2P" + }, + { + "cutoff_radius": 2.0, + "radial_function": [ + 1.9685647957676e-05, 2.0183992425409e-05, 2.069495253915447e-05, 2.121884766529376e-05, 2.175600525499558e-05, 2.230676104888258e-05, 2.287145928687943e-05, 2.345045292337299e-05, 2.404410384781928e-05, 2.465278311093528e-05, 2.527687115661644e-05, 2.591675805972541e-05, 2.657284376990032e-05, 2.724553836153508e-05, 2.793526229008798e-05, 2.864244665487846e-05, 2.936753346853679e-05, 3.011097593327492e-05, 3.087323872415083e-05, 3.165479827950414e-05, 3.24561430987434e-05, 3.327777404767251e-05, 3.412020467154601e-05, 3.498396151604962e-05, 3.586958445640624e-05, 3.67776270348131e-05, 3.770865680642134e-05, 3.86632556940738e-05, 3.964202035202315e-05, 4.064556253885754e-05, 4.167450949986632e-05, 4.272950435908591e-05, 4.381120652126993e-05, 4.492029208403507e-05, 4.605745426044086e-05, 4.722340381226616e-05, 4.841886949425455e-05, 4.96445985096057e-05, 5.090135697699704e-05, 5.218993040942863e-05, 5.351112420518902e-05, 5.486576415125043e-05, 5.625469693940715e-05, 5.767879069547971e-05, 5.913893552191615e-05, 6.063604405412808e-05, 6.217105203091155e-05, 6.374491887930699e-05, 6.535862831426517e-05, 6.701318895349359e-05, 6.870963494786688e-05, 7.044902662779679e-05, 7.223245116596384e-05, 7.406102325682657e-05, 7.593588581333232e-05, 7.785821068126403e-05, 7.982919937167138e-05, 8.185008381184325e-05, 8.392212711528962e-05, 8.604662437121713e-05, 8.822490345398757e-05, 9.045832585306908e-05, 9.274828752399691e-05, 9.509621976087535e-05, 9.726136932740803e-05, 9.97306284350374e-05, 0.0001022598276065862, 0.0001050122333931686, 0.0001076704775575753, 0.0001103993975459861, 0.0001131932758125139, 0.000116064305959531, 0.000119002719632101, 0.0001220189230291178, 0.0001251093037020854, 0.0001282792118600093, 0.0001315278188249991, 0.0001348597633043939, 0.0001382796282246253, 0.0001417794863875923, 0.0001453721231555756, 0.0001490525529308808, 0.0001528294786351552, 0.0001567006241386318, 0.000160667549048926, 0.0001647372434705626, 0.0001689110017478458, 0.0001731885961852389, 0.0001775767424416363, 0.0001820731056376952, 0.0001866833651135196, 0.0001914134422012469, 0.0001962614625727229, 0.0002012298958957867, 0.0002063258609009768, 0.0002115521173807523, 0.0002169094059106196, 0.0002224027421418721, 0.0002280384318298399, 0.0002338080389610125, 0.0002397334380599804, 0.0002458032241177665, 0.0002520264603207307, 0.0002584125082472211, 0.0002649532205957004, 0.0002716642912791384, 0.0002785434589047861, 0.0002855949051960456, 0.0002928305342558266, 0.0003002470032556484, 0.0003078458217681212, 0.000315642103653051, 0.0003236351441381477, 0.0003318312522646621, 0.0003402331450604859, 0.0003488489919081375, 0.0003576813201556712, 0.0003667372480361305, 0.0003760277083735683, 0.000385544181872297, 0.0003953086765878537, 0.0004053156097933412, 0.0004155807293265999, 0.0004261028995065656, 0.0004368900062268735, 0.0004479539151235368, 0.000459296319536381, 0.0004709242133593372, 0.0004828500880395531, 0.0004950766351834832, 0.0005076076218178459, 0.0005204637787258109, 0.0005336353209278251, 0.0005471540742593867, 0.0005610049892567477, 0.0005752092381473348, 0.0005897721330929258, 0.0006047031698538881, 0.0006200157860406564, 0.0006357118506673858, 0.0006518067922971411, 0.0006683115848583219, 0.0006852316141198974, 0.0007025809558116301, 0.0007203664919886135, 0.0007386069533263009, 0.0007573036540592074, 0.0007764800905331343, 0.0007961390678599379, 0.0008162960731386019, 0.0008369587514102542, 0.0008581534273478873, 0.0008798766422742912, 0.0009021539737003284, 0.0009249935562512899, 0.0009484108027820268, 0.0009724234573317151, 0.0009970416085263662, 0.001022285826184844, 0.001048166041995488, 0.001074702057918566, 0.001101911788959656, 0.001129806155611463, 0.001158414300032343, 0.001187736229050167, 0.001217808411571574, 0.001248638437950016, 0.001280248697340917, 0.001312663221265479, 0.001345892642352139, 0.001379966741400895, 0.001414903866663251, 0.001450722177706671, 0.001487452215069296, 0.001525105612184874, 0.001563718165673567, 0.001603303073874855, 0.001643893016581476, 0.001685510947150484, 0.001728178334455707, 0.001771935423679506, 0.001816787021424695, 0.001862782753079333, 0.001909943757051386, 0.001958289957033964, 0.002007870549200329, 0.002058700191802825, 0.002110816355625951, 0.002164256550912338, 0.002219041296389785, 0.002275222056624801, 0.0023328175627198, 0.002391873258635217, 0.002452427471661281, 0.002514509586507166, 0.002578167753850356, 0.00264343355872479, 0.00271035220035582, 0.002778968115780039, 0.002849315649075815, 0.002921448168352934, 0.002995402823851094, 0.003071231567892899, 0.003148985035965488, 0.003228694561947704, 0.00331043356857969, 0.003394238606101292, 0.003480161941226296, 0.00356826630439539, 0.003658593662122564, 0.003751210090323373, 0.003846171352302858, 0.003943537752695386, 0.004043368522840921, 0.004145720113547201, 0.004250673884011318, 0.004358274318324635, 0.004468603361494738, 0.004581729536651228, 0.00469770877165052, 0.004816631457292704, 0.004938560583178196, 0.005063575628319315, 0.005191761187766359, 0.005323185029420335, 0.005457939774617684, 0.005596101167489955, 0.005737762268254515, 0.005883014992160037, 0.006031933699898255, 0.006184628828695183, 0.006341183094782859, 0.00650170570666642, 0.006666292738128061, 0.006835039125504904, 0.007008061890672674, 0.007185463736757752, 0.007367351996391979, 0.007553850617648203, 0.007745062617065979, 0.007941121901812896, 0.008142139506205135, 0.008348244800832379, 0.008559569351773802, 0.008776239748593387, 0.008998398578651257, 0.009226179738352514, 0.009459720396911717, 0.009699178614232454, 0.009944692828881814, 0.01019642718919783, 0.01045452718826635, 0.01071916365453532, 0.01099049700358312, 0.0112686977401806, 0.011553938719508, 0.01184640031566325, 0.01214626446430298, 0.01245371770970838, 0.01276894987652009, 0.01309216393212424, 0.01342355820967666, 0.01376333969185249, 0.01411171877003362, 0.01446891292730004, 0.01483515293731274, 0.01521065878719301, 0.01559566724889082, 0.01599042101526297, 0.01639516404685898, 0.01681015141257863, 0.01723564085912923, 0.01767190009963414, 0.01811919769714662, 0.01857781322201834, 0.01904803764724573, 0.019530163621426, 0.02002448548658649, 0.02053132293233371, 0.02105097660167825, 0.02158378910457474, 0.02213008253905696, 0.02269019701091152, 0.02326449021015148, 0.02385331040868437, 0.02445703672709068, 0.02507603559425322, 0.02571069773705584, 0.02636142108280409, 0.02702860763567687, 0.02771268012672056, 0.02841405521467535, 0.0291331805315803, 0.02987050186606721, 0.03062647528410933, 0.03140157758135324, 0.03219629217100873, 0.03301111398346462, 0.033846550831472, 0.03470312104742748, 0.03558136428395399, 0.03648182941083787, 0.03740507224240197, 0.03835166971122598, 0.03932221510281037, 0.04031731498133818, 0.04133758866687147, 0.0423836686319897, 0.04345621125142078, 0.04455588673899946, 0.04568337742378936, 0.04683938977450118, 0.04802463403266613, 0.04923990740024607, 0.05048583416739516, 0.05176321928181804, 0.05307319290260039, 0.05441593578773816, 0.05579299375142308, 0.05720454110314104, 0.05865203269865993, 0.060135956803713, 0.06165762998606342, 0.06321750664154922, 0.06481700487600349, 0.06645699914928983, 0.06813812583090176, 0.06986224966974769, 0.07162955685142482, 0.07344189755142091, 0.07529970228062398, 0.07720471442910515, 0.07915774200431104, 0.08116027913553742, 0.08321337542418583, 0.08531836073141885, 0.08747641899966957, 0.08968941146255352, 0.09195789385027534, 0.09428409641117272, 0.09666864887927587, 0.09911394090822487, 0.1016206428933817, 0.1041909205973796, 0.1068261391015575, 0.1095276978579368, 0.1122977388150999, 0.1151378597907328, 0.1180495163704022, 0.1210346876191462, 0.1240954945058141, 0.1272334433820454, 0.130450794843745, 0.1337491664812146, 0.1371311551100107, 0.1405982798353678, 0.1441531380482701, 0.1477977731206558, 0.1515340085380478, 0.1553652865253731, 0.1592926741512652, 0.1633194979313469, 0.1674479650454225, 0.1716803939385072, 0.1760201094479351, 0.1804689386188659, 0.1850299589618447, 0.189706676867238, 0.1945005402768325, 0.1994160125162032, 0.2044552924739442, 0.2096213048828962, 0.2149181441562939, 0.220348570943546, 0.2259154096214718, 0.2316227981667671, 0.2374744981817958, 0.2434734091020527, 0.2496234018010499, 0.255928461927088, 0.2623924434314142, 0.2690195908640304, 0.2758130690582696, 0.2827784070054108, 0.2899184638359858, 0.2972390600439053, 0.3047435547419474, 0.3124368833827255, 0.3203245072288096, 0.3284097102882086, 0.3366996225831201, 0.3451967156853284, 0.353909144770746, 0.3628394126376839, 0.3719950942459996, 0.381380654103059, 0.3910023981367062, 0.4008657472613876, 0.4109773971974975, 0.421342973061632, 0.4319688925413653, 0.4428618639066635, 0.4540283120558292, 0.4654751852608829, 0.4772097077734012, 0.4892386834444754, 0.5015686761779736, 0.5142093823461177, 0.5271662985467613, 0.5404483028701675, 0.5540634558542998, 0.5680195579742764, 0.5823258924951683, 0.596990178895872, 0.612022005400111, 0.6274300661656761, 0.6432238518636224, 0.6594127482908252, 0.6760070397069271, 0.693015882132811, 0.7104505591539116, 0.7283201401462358, 0.7466370637114788, 0.7654108408865632, 0.7846539079189194, 0.8043768471974644, 0.824592402545522, 0.8453114916301623, 0.8665477423478412, 0.8883126666817738, 0.9106206707776594, 0.9334833482138121, 0.9569154495929786, 0.9809308173530427, 1.005542819434507, 1.030767212854651, 1.056618060992367, 1.083110958159896, 1.110261475196666, 1.13808571341309, 1.166599741652721, 1.195820506230622, 1.225765080487751, 1.256450828742346, 1.28789622967391, 1.320118782660366, 1.353137743121966, 1.386972170128934, 1.421641667438864, 1.457166301045776, 1.493566565410349, 1.530863332428654, 1.569078208142658, 1.608232899011003, 1.648350009604847, 1.689452140293945, 1.731563117200295, 1.774706469867444, 1.81890673505055, 1.864189009650858, 1.910578580788981, 1.958101867009429, 2.006784852027237, 2.056655575614862, 2.10774105045147, 2.160070087172369, 2.213671332152248, 2.268574681246218, 2.324809974705964, 2.382408190385637, 2.441400678534801, 2.501819597730438, 2.563697537781616, 2.627068034836586, 2.69196493619221, 2.758422991265543, 2.826477836825719, 2.896165248877803, 2.967522051050859, 3.040586037840879, 3.115394940527093, 3.191988007986548, 3.270404706197719, 3.350685479437483, 3.432871339079233, 3.517004117434365, 3.603126464450362, 3.69128159267739, 3.781513529535546, 3.873867165937196, 3.968387989223802, 4.065122273656497, 4.164117051397645, 4.265420197399175, 4.369080045020151, 4.475146058940878, 4.583668082976476, 4.694696844157714, 4.808283904321413, 4.924481244457928, 5.043341770147665, 5.164919060103167, 5.289267159070025, 5.416441160292255, 5.546496284434152, 5.679488868669047, 5.815475476799467, 5.954513442485213, 6.096660610149124, 6.241975268182355, 6.390516276992194, 6.542342959888222, 6.697514902618252, 6.85609228001956, 7.018135386776329, 7.183704995514239, 7.352861919419345, 7.525667321513728, 7.702182325550666, 7.882468227384281, 8.06658623104817, 8.254597520371922, 8.44656305474572, 8.64254356888407, 8.842599458116796, 9.046790744516706, 9.255176812785322, 9.467816530744939, 9.684767934002926, 9.906088255371555, 10.13183373964786, 10.36205941305347, 10.59681919965583, 10.83616546585298, 11.08014912130282, 11.32881926788486, 11.58222315354638, 11.84040590137722, 12.10341035907488, 12.3712769195061, 12.64404323789639, 12.92174402519269, 13.20441093681138, 13.49207208677447, 13.78475204017997, 14.08247131417247, 14.38524633851122, 14.69308888811203, 15.00600601121946, 15.32399955519803, 15.64706591146701, 15.97519557529576, 16.30837288677766, 16.64657556870153, 16.98977435944272, 17.33793255770627, 17.69100570603784, 18.04894096413416, 18.41167683480046, 18.77914251729116, 19.15125758114533, 19.52793124730343, 19.90906207355453, 20.29453723100054, 20.6842320454255, 21.07800938437716, 21.47571902771713, 21.87719710930017, 22.28226543244303, 22.69073088475299, 23.10238468897007, 23.51700184142124, 23.93434030745345, 24.35414040530326, 24.77612405120465, 25.19999406481987, 25.62543338925347, 26.05210441387925, 26.47964821490163, 26.90768373384234, 27.3358071860746, 27.76359113203735, 28.19058390154424, 28.61630874590541, 29.04026316918994, 29.46191822204886, 29.88071777906629, 30.29607792326217, 30.70738626479791, 31.11400137152693, 31.51525216918872, 31.91043747398844, 32.29882545716242, 32.67965330198987, 33.05212680123323, 33.41542014199128, 33.76867566146848, 34.11100379637659, 34.44148306909926, 34.75916019261365, 35.06305031774948, 35.35213741443286, 35.62537475451477, 35.88168562095391, 36.1199641179821, 36.33907622415489, 36.53786098944347, 36.71513199032909, 36.86967900460944, 37.00026988590626, 37.10565276105864, 37.1845584632297, 37.23570326027649, 37.25779191856718, 37.24952103958917, 37.20958281228811, 37.13666904026914, 37.02947562754036, 36.88670737331822, 36.70708324731044, 36.48934202871086, 36.2322483977548, 35.93459947023172, 35.59523177956619, 35.21302873500622, 34.78692850370087, 34.31593240049732, 33.7991137420194, 33.23562709055539, 32.62471807392463, 31.96573350020302, 31.25813200587953, 30.50149501435325, 29.69553808099501, 28.84012258410354, 27.93526761953013, 26.98116215800689, 25.97817735406116, 24.92687887692943, 23.82803926519282, 22.68265014952812, 21.49193422077332, 20.25735689669296, 18.98063740987256, 17.66375929307608, 16.30898003853561, 14.91883967382253, 13.49616819995033, 12.04409149602499, 10.56603559191746, 9.06572895202904, 7.547202526078589, 6.014787290440832, 4.473108921481664, 2.927079295253247, 1.381884505689744, -0.157031107303137, -1.683985043719982, -3.193081510333393, -4.678241228546646, -6.13323670570356, -7.551733342493371, -8.92733671522312, -10.25364648686836, -11.52431727005889, -12.73312674746344, -13.87405146474489, -14.94135044335835, -15.92965692047071, -16.83398755685865, -17.64930235107137, -18.37055399886088, -18.99281803967118, -19.51134337329927, -19.9216085116383, -20.21938388928693, -20.4008004357442, -20.46242441252194, -20.40133862548299, -20.21522977094334, -19.90248164877257, -19.46227389550376, -18.89468544292667, -18.20080213488041, -17.38282743028597, -16.4441951283894, -15.38968302755626, -14.22552603928456, -12.9595275479224, -11.6011675045106, -10.16170583651384, -8.65427976389223, -7.093993520629873, -5.497999104370734, -3.885566502403437, -2.278141935778793, -0.6993925136581307, 0.8247644769117262, 2.266149469807738, 3.594327825722308, 4.776638677722755, 5.778247926388125, 6.562228313527325, 7.08966948955424, 7.31982220010095, 7.210273566195856, 6.717220402746342, 5.795230750273895, 4.402459756005975, 2.45339987312685, 0.2819065461031016, -0.02984274247931374, 0.000854828891906883, -0.002400411135740814, -0.00229028972853778, -0.00251427512722701, -0.002705160202194438, -0.00289891316814038, -0.003091510797181505, -0.003282878377651808, -0.00347253681395775 + ], + "ultrasoft_cutoff_radius": 2.2, + "angular_momentum": 1, + "label": "2P" + } + ], + "D_ion": [ + -0.96087750697826, -0.3144846843225896, 0.0, 0.0, -0.3144846843225896, -0.2342641749844402, 0.0, 0.0, 0.0, 0.0, 0.010197340010515865, 0.024501575295561184, 0.0, 0.0, 0.024501575295561184, 0.0558903550752182 + ], + "local_potential": [ + -6.280912050418135, -6.280912030591415, -6.28091201026278, -6.28091198941953, -6.28091196804863, -6.280911946136725, -6.280911923670115, -6.280911900634765, -6.280911877016275, -6.28091185279988, -6.280911827970445, -6.28091180251245, -6.280911776409985, -6.280911749646735, -6.28091172220597, -6.280911694070535, -6.28091166522285, -6.280911635644885, -6.280911605318155, -6.28091157422369, -6.28091154234207, -6.280911509653365, -6.28091147613714, -6.28091144177245, -6.280911406537815, -6.28091137041121, -6.280911333370055, -6.280911295391195, -6.280911256450905, -6.280911216524835, -6.28091117558803, -6.280911133614905, -6.280911090579225, -6.2809110464541, -6.280911001211935, -6.280910954824465, -6.280910907262685, -6.28091085849688, -6.280910808496565, -6.28091075723048, -6.280910704666595, -6.280910650772045, -6.28091059551315, -6.280910538855375, -6.2809104807633, -6.28091042120062, -6.280910360130105, -6.28091029751358, -6.28091023331192, -6.280910167484985, -6.280910099991635, -6.28091003078968, -6.280909959835875, -6.28090988708587, -6.28090981249419, -6.28090973601422, -6.280909657598145, -6.280909577196965, -6.28090949476042, -6.28090941023699, -6.28090932357384, -6.280909234716805, -6.280909143610345, -6.280909050197525, -6.280908954419945, -6.280908856217755, -6.28090875552956, -6.28090865229244, -6.28090854644187, -6.28090843791168, -6.28090832663404, -6.2809082125394, -6.280908095556445, -6.28090797561206, -6.280907852631275, -6.28090772653722, -6.28090759725109, -6.28090746469207, -6.28090732877732, -6.28090718942187, -6.280907046538635, -6.2809069000383, -6.2809067498293, -6.280906595817755, -6.280906437907395, -6.28090627599953, -6.280906109992965, -6.28090593978393, -6.28090576526605, -6.28090558633024, -6.28090540286466, -6.28090521475465, -6.280905021882625, -6.28090482412804, -6.28090462136729, -6.28090441347364, -6.280904200317165, -6.28090398176463, -6.280903757679435, -6.280903527921515, -6.28090329234727, -6.28090305080946, -6.28090280315712, -6.280902549235465, -6.280902288885775, -6.280902021945335, -6.2809017482473, -6.2809014676206, -6.28090117988984, -6.280900884875175, -6.280900582392225, -6.28090027225192, -6.280899954260425, -6.28089962821898, -6.28089929392381, -6.28089895116597, -6.280898599731225, -6.280898239399925, -6.28089786994686, -6.28089749114111, -6.28089710274591, -6.28089670451851, -6.28089629621001, -6.28089587756521, -6.280895448322445, -6.280895008213435, -6.280894556963105, -6.280894094289415, -6.280893619903185, -6.280893133507915, -6.280892634799595, -6.280892123466535, -6.28089159918914, -6.28089106163973, -6.28089051048233, -6.28088994537245, -6.280889365956895, -6.280888771873525, -6.28088816275103, -6.280887538208695, -6.28088689785618, -6.280886241293255, -6.280885568109565, -6.28088487788436, -6.280884170186245, -6.2808834445729, -6.280882700590815, -6.280881937774995, -6.28088115564867, -6.28088035372301, -6.28087953149681, -6.280878688456165, -6.280877824074185, -6.28087693781062, -6.28087602911155, -6.280875097409045, -6.280874142120785, -6.280873162649715, -6.28087215838367, -6.280871128694975, -6.28087007294009, -6.280868990459165, -6.28086788057565, -6.280866742595885, -6.28086557580863, -6.280864379484655, -6.28086315287626, -6.28086189521684, -6.28086060572036, -6.280859283580895, -6.28085792797214, -6.280856538046845, -6.280855112936325, -6.28085365174992, -6.2808521535744, -6.28085061747345, -6.28084904248702, -6.28084742763078, -6.28084577189549, -6.280844074246345, -6.28084233362236, -6.280840548935695, -6.28083871907096, -6.280836842884555, -6.280834919203915, -6.280832946826805, -6.28083092452056, -6.280828851021305, -6.280826725033185, -6.28082454522754, -6.280822310242085, -6.280820018680045, -6.280817669109305, -6.280815260061485, -6.280812790031055, -6.280810257474375, -6.280807660808725, -6.280804998411335, -6.280802268618365, -6.280799469723845, -6.28079659997866, -6.28079365758939, -6.28079064071725, -6.2807875474769, -6.280784375935285, -6.280781124110435, -6.28077778997021, -6.280774371431045, -6.280770866356635, -6.280767272556605, -6.28076358778516, -6.28075980973966, -6.280755936059175, -6.280751964323055, -6.280747892049355, -6.28074371669334, -6.28073943564587, -6.28073504623177, -6.28073054570817, -6.280725931262775, -6.28072120001214, -6.280716348999835, -6.28071137519461, -6.280706275488525, -6.28070104669497, -6.28069568554671, -6.280690188693835, -6.280684552701645, -6.28067877404855, -6.280672849123825, -6.280666774225405, -6.28066054555752, -6.280654159228365, -6.28064761124767, -6.28064089752418, -6.28063401386313, -6.28062695596362, -6.280619719415915, -6.28061229969872, -6.280604692176335, -6.28059689209577, -6.28058889458379, -6.28058069464386, -6.280572287153025, -6.28056366685873, -6.280554828375535, -6.280545766181745, -6.28053647461598, -6.28052694787364, -6.28051718000328, -6.2805071649029, -6.28049689631613, -6.28048636782835, -6.28047557286268, -6.28046450467586, -6.28045315635407, -6.280441520808605, -6.280429590771465, -6.280417358790825, -6.280404817226355, -6.280391958244525, -6.280378773813645, -6.28036525569892, -6.280351395457285, -6.28033718443216, -6.280322613748035, -6.28030767430496, -6.28029235677287, -6.280276651585765, -6.280260548935765, -6.280244038766975, -6.280227110769255, -6.28020975437177, -6.280191958736435, -6.28017371275113, -6.28015500502282, -6.28013582387043, -6.2801161573176, -6.280095993085215, -6.28007531858376, -6.28005412090551, -6.28003238681645, -6.280010102748105, -6.279987254789045, -6.279963828676255, -6.27993980978628, -6.27991518312609, -6.27988993332379, -6.279864044619075, -6.279837500853375, -6.279810285459885, -6.279782381453225, -6.279753771418885, -6.279724437502435, -6.27969436139841, -6.279663524338945, -6.279631907082135, -6.279599489900075, -6.27956625256662, -6.279532174344825, -6.279497233974085, -6.279461409656945, -6.27942467904557, -6.27938701922791, -6.27934840671345, -6.279308817418695, -6.279268226652205, -6.27922660909933, -6.27918393880649, -6.27914018916513, -6.279095332895235, -6.279049342028415, -6.279002187890645, -6.278953841084475, -6.27890427147087, -6.27885344815056, -6.27880133944494, -6.27874791287648, -6.278693135148665, -6.278636972125425, -6.278579388810045, -6.278520349323555, -6.27845981688261, -6.27839775377675, -6.278334121345195, -6.27826887995296, -6.278201988966435, -6.27813340672837, -6.2780630905322, -6.277990996595745, -6.27791708003426, -6.277841294832845, -6.277763593818125, -6.27768392862925, -6.27760224968819, -6.27751850616927, -6.27743264596798, -6.277344615668995, -6.27725436051341, -6.277161824365165, -6.277066949676665, -6.2769696774535, -6.276869947218375, -6.27676769697407, -6.27666286316556, -6.276555380641155, -6.27644518261271, -6.27633220061486, -6.276216364463255, -6.27609760221176, -6.27597584010862, -6.275851002551565, -6.275723012041805, -6.27559178913689, -6.275457252402455, -6.27531931836276, -6.27517790145006, -6.275032913952705, -6.274884265962025, -6.274731865317885, -6.274575617552965, -6.27441542583566, -6.274251190911615, -6.27408281104386, -6.273910181951475, -6.273733196746825, -6.27355174587123, -6.273365717029155, -6.273174995120775, -6.272979462172955, -6.272778997268555, -6.272573476474105, -6.2723627727657, -6.272146755953145, -6.271925292602355, -6.27169824595583, -6.27146547585134, -6.27122683863862, -6.27098218709416, -6.270731370333955, -6.270474233724215, -6.27021061879001, -6.26994036312177, -6.26966330027958, -6.269379259695295, -6.26908806657238, -6.26878954178337, -6.268483501765075, -6.26816975841127, -6.26784811896298, -6.267518385896245, -6.26718035680732, -6.26683382429525, -6.26647857584181, -6.266114393688695, -6.26574105471199, -6.26535833029376, -6.26496598619081, -6.264563782400505, -6.26415147302361, -6.263728806124075, -6.26329552358574, -6.262851360965915, -6.2623960473457, -6.26192930517711, -6.26145085012682, -6.260960390916595, -6.26045762916025, -6.25994225919714, -6.2594139679221, -6.25887243461182, -6.258317330747485, -6.257748319833815, -6.25716505721423, -6.256567189882275, -6.255954356289125, -6.255326186147165, -6.254682300229605, -6.25402231016604, -6.25334581823394, -6.252652417145985, -6.251941689833215, -6.25121320922398, -6.25046653801856, -6.249701228459505, -6.24891682209757, -6.248112849553285, -6.24728883027405, -6.246444272286775, -6.24557867194596, -6.244691513677325, -6.24378226971676, -6.24285039984477, -6.24189535111628, -6.240916557585795, -6.23991344002796, -6.23888540565343, -6.23783184782017, -6.23675214574, -6.235645664180635, -6.23451175316301, -6.233349747654035, -6.23215896725482, -6.230938715884325, -6.229688281458535, -6.22840693556518, -6.22709393313412, -6.22574851210336, -6.224369893080905, -6.222957279002375, -6.221509854784695, -6.220026786975735, -6.218507223400205, -6.216950292801885, -6.21535510448226, -6.21372074793585, -6.21204629248232, -6.21033078689559, -6.208573259030175, -6.20677271544492, -6.20492814102445, -6.203038498598515, -6.20110272855961, -6.199119748479025, -6.19708845272181, -6.19500771206087, -6.19287637329064, -6.19069325884065, -6.18845716638949, -6.18616686847954, -6.183821112132935, -6.181418618469325, -6.1789580823259, -6.17643817188021, -6.17385752827646, -6.17121476525579, -6.168508468791255, -6.16573719672819, -6.162899478430625, -6.159993814434595, -6.157018676109035, -6.15397250532518, -6.15085371413532, -6.14766068446174, -6.144391767796955, -6.14104528491606, -6.137619525602395, -6.13411274838747, -6.130523180306355, -6.126849016669685, -6.123088420853505, -6.119239524108155, -6.11530042538761, -6.111269191200525, -6.107143855484425, -6.1029224195045, -6.09860285177843, -6.094183088028855, -6.08966103116499, -6.085034551295035, -6.080301485771025, -6.07545963926782, -6.070506783897965, -6.06544065936419, -6.060258973151275, -6.054959400759255, -6.04953958597961, -6.04399714121648, -6.038329647854675, -6.032534656676375, -6.026609688328475, -6.02055223384236, -6.014359755208055, -6.00802968600457, -6.00155943208825, -5.994946372340915, -5.98818785947955, -5.98128122092916, -5.97422375976041, -5.96701275569357, -5.95964546617011, -5.952119127493275, -5.944430956038765, -5.936578149536485, -5.928557888424195, -5.920367337273675, -5.912003646289785, -5.903463952882555, -5.89474538331226, -5.88584505440693, -5.876760075351735, -5.86748754954898, -5.8580245765474, -5.848368254038715, -5.83851567991923, -5.828463954413625, -5.81821018225757, -5.807751474935295, -5.797084952967655, -5.78620774824552, -5.775117006402775, -5.76380988922238, -5.75228357706827, -5.74053527133514, -5.728562196907145, -5.71636160461596, -5.703930773687525, -5.69126701416605, -5.67836766930275, -5.665230117896015, -5.651851776568525, -5.63823010196599, -5.62436259286111, -5.61024679214545, -5.59588028869079, -5.581260719060785, -5.566385769052705, -5.551253175048275, -5.535860725151865, -5.52020626009359, -5.504287673874395, -5.488102914129805, -5.47164998218884, -5.4549269328046, -5.437931873533275, -5.42066296373901, -5.40311841320287, -5.385296480315555, -5.36719546983521, -5.34881373019405, -5.330149650340305, -5.311201656105635, -5.29196820609229, -5.272447787079545, -5.252638908954765, -5.232540099181555, -5.212149896825545, -5.191466846167585, -5.17048948994469, -5.14921636227114, -5.12764598130536, -5.105776841743395, -5.083607407236185, -5.061136102846085, -5.038361307678035, -5.01528134784208, -4.991894489927043, -4.968198935189431, -4.944192814687164, -4.919874185613946, -4.895241029116777, -4.870291249905362, -4.8450226779876004, -4.819433072888664, -4.793520130731479, -4.767281494572277, -4.740714768394584, -4.713817535166928, -4.6865873793614075, -4.659021914309959, -4.631118814740252, -4.602875854781061, -4.574290951655472, -4.5453622151868736, -4.516088003125725, -4.486466982163131, -4.456498194329878, -4.426181128287617, -4.395515794804243, -4.364502805472411, -4.33314345348408, -4.301439795023482, -4.2693947295960415, -4.237012077384405, -4.204296651529727, -4.171254323093119, -4.13789207637665, -4.104218052293862, -4.070241577594581, -4.035973177985367, -4.00142457356067, -3.966608655484549, -3.9315394435497617, -3.8962025118577244, -3.860636254274922, -3.824866280404036, -3.7889103728693745, -3.7527870032398356, -3.716515228894207, -3.6801146026614493, -3.6436051071744293, -3.607007128446101, -3.5703402201275596, -3.533619324644883, -3.496858501443439, -3.46007216841235, -3.4232750871980073, -3.3864823459459785, -3.3497093392918647, -3.3129717454187917, -3.276285499997079, -3.239666766820971, -3.203131904958055, -3.166697432229297, -3.13037998484218, -3.094196273005825, -3.058163032366074, -3.0222969711105536, -2.986614712608522, -2.951132733469156, -2.9158672969242323, -2.8808343814679604, -2.8460496047184036, -2.811528142501369, -2.777284643200092, -2.7433331374621717, -2.70968694340974, -2.676358567560354, -2.643359601733948, -2.6107006162968496, -2.57839105017594, -2.5464390981653433, -2.514851596143774, -2.483633904921798, -2.452789793544315, -2.4223213229823655, -2.39222873125827, -2.362510321156647, -2.333162351777575, -2.304179345336058, -2.275556371670399, -2.2472889583779456, -2.219372688612388, -2.1918032003935566, -2.164576185925833, -2.1376873909250556, -2.1111326139537256, -2.0849077057645853, -2.059008568652272, -2.0334311558131084, -2.008171470712862, -1.983225566462336, -1.958589545200813, -1.9342595574871155, -1.9102318016982935, -1.886502523435802, -1.8630680149390375, -1.8399246145062196, -1.8170687059224155, -1.7944967178947084, -1.7722051234943645, -1.7501904396058785, -1.728449226382891, -1.706978086710781, -1.685773665675942, -1.6648326500416055, -1.6441517677301165, -1.623727787311652, -1.6035575174992145, -1.5836378066499055, -1.563965542272368, -1.5445376505403114, -1.5253510958121, -1.506402880156276, -1.487690042882993, -1.469209660081274, -1.4509588441620025, -1.43293474340663, -1.4151345415214684, -1.3975554571975475, -1.3801947436759565, -1.3630496883185694, -1.3461176121841496, -1.329395869609691, -1.312881847796998, -1.2965729664044066, -1.2804666771435584, -1.264560463381228, -1.248851839746064, -1.2333383517402465, -1.218017575355966, -1.202887116696655, -1.187944611602949, -1.173187725283276, -1.1586141519490476, -1.144221614454385, -1.1300078639403075, -1.1159706794833575, -1.1021078677485765, -1.0884172626468, -1.074896724996218, -1.061544142188116, -1.048357427856794, -1.0353345215535654, -1.0224733884248216, -1.0097720188940884, -0.9972284283480265, -0.98484065682635, -0.9726067687155755, -0.96052485244659, -0.948593020195978, -0.9368094075910425, -0.9251721734185155, -0.9136794993368595, -0.9023295895921645, -0.8911206707375725, -0.88005099135617, -0.8691188217873504, -0.858322453856548, -0.847660200608353, -0.837130396042932, -0.826731394855717, -0.8164615721803455, -0.806319323334774, -0.7963030635705605, -0.7864112278252615, -0.776642270477889, -0.766994665107429, -0.757466904254339, -0.7480574991850205, -0.7387649796592165, -0.729587893700288, -0.7205248073683616, -0.7115743045362765, -0.7027349866683245, -0.694005472601744, -0.6853843983309075, -0.6768704167942144, -0.6684621976636075, -0.660158427136726, -0.651957807731615, -0.6438590580840035, -0.6358609127470966, -0.6279621219938405, -0.620161451621663, -0.612457682759617, -0.6048496116779355, -0.597336049599945, -0.589915822516309, -0.582587771001587, -0.5753507500330615, -0.568203628811836, -0.561145290586092, -0.554174632476626, -0.547290565304503, -0.5404920134208495, -0.533777914538772, -0.5271472195673595, -0.5205988924477415, -0.514131909991189, -0.50774526171922, -0.501437949705692, -0.4952089884208601, -0.4890574045773638, -0.4829822369781353, -0.4769825363661873, -0.4710573652762751, -0.4652057978884011, -0.4594269198831314, -0.4537198282987228, -0.4480836313900126, -0.44251744848907215, -0.43702040986758983, -0.43159165660095994, -0.4262303404340715, -0.4209356236487538, -0.41570667893288077, -0.4105426892511001, -0.4054428477171651, -0.40040635746786524, -0.3954324315385122, -0.3905202927399874, -0.3856691735373115, -0.3808783159297243, -0.3761469713322639, -0.3714744004588099, -0.3668598732065878, -0.36230266854211063, -0.35780207438853334, -0.35335738751441687, -0.34896791342386746, -0.3446329662480476, -0.340351868638037, -0.336123951659018, -0.33194855468578643, -0.3278250252995532, -0.323752719186031, -0.31973100003479116, -0.31575923943986167, -0.31183681680157005, -0.30796311922959557, -0.30413754144723143, -0.3003594856968344, -0.29662836164644213, -0.2929435862975583, -0.28930458389407165, -0.2857107858323119, -0.2821616305722193, -0.27865656354961066, -0.27519503708954135, -0.2717765103207323, -0.268400449091064, -0.26506632588411555, -0.2617736197367353, -0.2585218161576387, -0.25531040704700786, -0.25213889061709194, -0.2490067713137922, -0.2459135597392133, -0.24285877257518124, -0.23984193250770344, -0.23686256815236845, -0.233920213980672, -0.2310144102472505, -0.2281447029180255, -0.2253106435992316, -0.22251178946732975, -0.2197477031997907, -0.21701795290673276, -0.2143221120634156, -0.2116597594435654, -0.2090304790535325, -0.2064338600672677, -0.203869496762102, -0.2013369884553306, -0.19883593944158026, -0.19636595893095796, -0.1939266609879701, -0.19151766447119645, -0.18913859297372074, -0.18678907476429654, -0.1844687427292483, -0.1821772343150951, -0.17991419147188434, -0.1776792605972355, -0.1754720924810735, -0.17329234225105306, -0.1711396693186599, -0.1690137373259784, -0.1669142140931238, -0.16484077156632085, -0.16279308576669174, -0.1607708367388945, -0.158773708506096, -0.1568013890022884, -0.1548535844778167, -0.15292996130850575, -0.15103024058751366, -0.14915408034913116, -0.14730125860587084, -0.1454714529839571, -0.14366437757253786, -0.14187975001240366, -0.14011729145186655, -0.138376726503191, -0.136657783199562, -0.1349601929525908, -0.13328369051034786, -0.1316280139159151, -0.1299929044664565, -0.12837810667279365, -0.12678336821948574, -0.1252084399254052, -0.12365307570480145, -0.12211703252885105, -0.1206000703876824, -0.11910195225287434, -0.1176224440404209, -0.1161613145741533, -0.1147183355496202, -0.113293281498413, -0.1118859297529367, -0.1104960604116182, -0.1091234563045448, -0.1077679029595326, -0.10642918856861334, -0.1051071039549396, -0.1038014425401011, -0.10251200031184494, -0.10123857579219996, -0.09998097000599394, -0.09873898644976414, -0.09751243106105335, -0.0963011121880862, -0.09510484055982475, -0.0939234292563931, -0.0927566936798713, -0.09160445152545205, -0.0904665227529542, -0.08934272955869225, -0.08823289634769305, -0.0871368497062592, -0.08605441837487345, -0.08498543322143765, -0.08392972721484715, -0.08288713539889025, -0.0818574948664745, -0.0808406447341721, -0.07983642611708085, -0.07884468210399904, -0.07786525773290676, -0.07689799996675314, -0.0759427576695447, -0.0749993815827287, -0.0740677243018726, -0.0731476402536306, -0.07223898567299815, -0.07134161858084855, -0.07045539876174775, -0.06958018774204625, -0.0687158487682414, -0.06786224678560995, -0.06701924841710576, -0.06618672194251825, -0.06536453727789235, -0.06455256595520124, -0.06375068110227385, -0.0629587574229705, -0.06217667117760495, -0.06140430016361045, -0.06064152369644455, -0.05988822259073225, -0.05914427914164335, -0.05840957710650025, -0.05768400168661535, -0.05696743950935295, -0.05625977861041495, -0.05556090841634635, -0.05487071972725725, -0.054189104699761, -0.05351595683012265, -0.052851170937618, -0.05219464314809895, -0.05154627087776255, -0.0509059528171226, -0.0502735889151793, -0.049649080363786284, -0.049032329582212056, -0.04842324020189201, -0.04782171705137143, -0.047227666141434185, -0.04664099465041688, -0.046061610909705626, -0.04548942438941202, -0.044924345684228276, -0.044366286499456994, -0.04381515963721508, -0.04327087898280919, -0.04273335949127958, -0.04220251717411225, -0.041678269086115056, -0.04116053331245761, -0.04064922895587204, -0.040144276124012265, -0.039645595916971095, -0.03915311041495156, -0.0386667426660919, -0.038186416674441996, -0.03771205738808826, -0.03724359068742726, -0.03678094337358385, -0.0363240431569739, -0.03587281864600913, -0.03542719933594156, -0.03498711559784748, -0.03455249866774742, -0.03412328063586174, -0.0336993944359998, -0.03328077383508039, -0.03286735342278303, -0.03245906860132714, -0.03205585557537863, -0.03165765134208198, -0.031264393681215454, -0.030876021145469574, -0.03049247305084544, -0.03011368946717286 + ], + "atomic_wave_functions": [ + { + "occupation": 2.0, + "radial_function": [ + -0.000732655641967068, -0.0007418713130032765, -0.000751202902817323, -0.0007606518694843582, -0.0007702196894197191, -0.0007799078576096242, -0.0007897178878447532, -0.0007996513129567773, -0.0008097096850578563, -0.0008198945757831542, -0.0008302075765363975, -0.0008406502987385326, -0.0008512243740794987, -0.0008619314547731771, -0.000872773213815545, -0.0008837513452460673, -0.0008948675644123905, -0.000906123608238356, -0.0009175212354953842, -0.0009290622270772777, -0.000940748386278475, -0.0009525815390758075, -0.0009645635344137999, -0.0009766962444935571, -0.0009889815650652848, -0.00100142141572449, -0.001014017740211908, -0.001026772506717198, -0.001039687708186457, -0.001052765362633615, -0.001066007513455724, -0.001079416229752236, -0.001092993606648274, -0.001106741765621985, -0.001120662854836008, -0.001134759049473102, -0.001149032552076002, -0.001163485592891551, -0.001178120430219151, -0.00119293935076361, -0.00120794466999241, -0.001223138732497482, -0.001238523912361523, -0.00125410261352892, -0.001269877270181353, -0.001285850347118095, -0.001302024340141119, -0.001318401776445035, -0.001334985215011936, -0.001351777247011212, -0.001368780496204375, -0.001385997619355003, -0.001403431306643815, -0.001421084282088981, -0.001438959303971708, -0.001457059165267185, -0.00147538669408094, -0.001493944754090693, -0.001512736244993753, -0.00153176410296006, -0.001551031301090912, -0.001570540849883447, -0.001590295797701001, -0.001610299231249344, -0.001630554276058929, -0.001651064096973194, -0.001671831898643005, -0.001692860926027326, -0.001714154464900168, -0.00173571584236393, -0.001757548427369176, -0.001779655631240969, -0.001802040908211808, -0.001824707755961258, -0.001847659716162397, -0.001870900375035104, -0.001894433363906316, -0.001918262359777321, -0.001942391085898196, -0.001966823312349466, -0.001991562856631058, -0.002016613584258686, -0.002041979409367716, -0.002067664295324629, -0.002093672255346176, -0.002120007353126302, -0.002146673703470967, -0.002173675472940943, -0.002201016880502691, -0.002228702198187428, -0.002256735751758456, -0.002285121921386916, -0.002313865142336011, -0.002342969905653831, -0.002372440758874903, -0.002402282306730541, -0.00243249921186814, -0.002463096195579506, -0.002494078038538333, -0.002525449581546976, -0.002557215726292574, -0.002589381436112698, -0.002621951736770634, -0.002654931717240365, -0.002688326530501489, -0.002722141394344057, -0.00275638159218357, -0.002791052473886201, -0.002826159456604398, -0.002861708025622987, -0.00289770373521589, -0.002934152209513631, -0.002971059143381739, -0.003008430303310171, -0.003046271528313946, -0.003084588730845058, -0.003123387897715863, -0.003162675091034076, -0.003202456449149497, -0.003242738187612661, -0.003283526600145509, -0.003324828059624238, -0.003366649019074534, -0.003408996012679303, -0.003451875656799032, -0.00349529465100499, -0.003539259779125397, -0.003583777910304753, -0.003628856000076441, -0.003674501091448842, -0.003720720316005035, -0.003767520895016364, -0.003814910140569966, -0.003862895456710442, -0.003911484340595903, -0.003960684383668477, -0.004010503272839578, -0.004060948791689997, -0.004112028821685107, -0.004163751343405297, -0.004216124437791855, -0.004269156287408494, -0.004322855177718728, -0.004377229498379238, -0.004432287744549549, -0.004488038518218048, -0.004544490529544715, -0.004601652598220683, -0.004659533654844842, -0.004718142742317748, -0.00477748901725298, -0.004837581751406243, -0.004898430333122383, -0.004960044268800531, -0.00502243318437768, -0.005085606826830801, -0.005149575065697858, -0.00521434789461786, -0.005279935432890261, -0.00534634792705386, -0.005413595752485544, -0.00548168941501906, -0.005550639552584036, -0.00562045693686565, -0.005691152474984985, -0.005762737211200503, -0.005835222328630874, -0.005908619150999292, -0.00598293914439972, -0.006058193919085281, -0.006134395231278924, -0.006211554985006959, -0.006289685233955409, -0.006368798183349616, -0.006448906191857473, -0.006530021773516353, -0.006612157599684343, -0.006695326501015676, -0.006779541469461064, -0.006864815660292977, -0.006951162394156218, -0.007038595159144332, -0.007127127612901733, -0.007216773584752303, -0.007307547077854607, -0.007399462271383874, -0.007492533522741524, -0.007586775369792052, -0.007682202533128013, -0.007778829918363305, -0.007876672618454972, -0.007975745916054247, -0.008076065285886674, -0.008177646397162138, -0.00828050511601494, -0.008384657507974198, -0.008490119840465339, -0.008596908585342513, -0.00870504042145277, -0.008814532237232178, -0.008925401133334222, -0.009037664425291135, -0.009151339646208232, -0.009266444549491874, -0.009382997111611464, -0.009501015534895695, -0.009620518250363768, -0.009741523920591733, -0.009864051442614553, -0.009988119950864304, -0.01011374882014475, -0.01024095766864312, -0.01036976636097918, -0.01050019501129218, -0.0106322639863662, -0.01076599390879425, -0.01090140566018176, -0.01103852038438959, -0.01117735949081757, -0.01131794465772851, -0.01146029783561341, -0.01160444125059867, -0.01175039740789502, -0.01189818909528957, -0.01204783938668096, -0.01219937164565797, -0.01235280952912284, -0.01250817699095899, -0.01266549828574421, -0.01282479797250979, -0.0129861009185458, -0.01314943230325357, -0.01331481762204545, -0.01348228269029283, -0.01365185364732264, -0.01382355696046311, -0.01399741942913931, -0.01417346818901903, -0.01435173071620958, -0.01453223483150631, -0.01471500870469296, -0.0149000808588951, -0.0150874801749867, -0.01527723589605068, -0.01546937763189438, -0.0156639353636198, -0.01586093944825007, -0.01606042062341234, -0.0162624100120777, -0.01646693912735911, -0.01667403987736759, -0.0168837445701278, -0.01709608591855313, -0.01731109704548149, -0.01752881148877223, -0.01774926320646477, -0.01797248658199991, -0.01819851642950427, -0.01842738799913875, -0.01865913698251162, -0.01889379951815683, -0.01913141219707866, -0.01937201206836279, -0.0196156366448553, -0.01986232390890967, -0.02011211231820277, -0.02036504081162072, -0.0206211488152151, -0.02088047624823046, -0.02114306352920384, -0.02140895158213696, -0.02167818184274195, -0.02195079626476143, -0.02222683732636352, -0.02250634803661265, -0.02278937194201712, -0.02307595313315405, -0.02336613625137219, -0.02365996649557396, -0.02395748962907727, -0.02425875198655762, -0.02456380048107162, -0.02487268261116279, -0.02518544646804996, -0.0255021407428997, -0.02582281473418281, -0.0261475183551168, -0.02647630214119363, -0.02680921725779504, -0.02714631550789508, -0.02748764933985134, -0.02783327185528561, -0.02818323681705419, -0.02853759865730933, -0.02889641248565231, -0.02925973409737866, -0.029627619981817, -0.03000012733076148, -0.03037731404699901, -0.03075923875293216, -0.03114596079929789, -0.03153754027398381, -0.03193403801094154, -0.03233551559919907, -0.03274203539197192, -0.03315366051587412, -0.0335704548802301, -0.03399248318648752, -0.03441981093773198, -0.03485250444830444, -0.03529063085352178, -0.03573425811950108, -0.03618345505308844, -0.03663829131189267, -0.03709883741442477, -0.03756516475034316, -0.03803734559080597, -0.03851545309892977, -0.03899956134035659, -0.03948974529392865, -0.03998608086247154, -0.04048864488368592, -0.04099751514114904, -0.04151277037542496, -0.04203449029528501, -0.04256275558903837, -0.04309764793597204, -0.04363925001790207, -0.0441876455308346, -0.04474291919673758, -0.04530515677542325, -0.04587444507654068, -0.04645087197167892, -0.04703452640658045, -0.04762549841346458, -0.04822387912346023, -0.048829760779149, -0.04944323674721626, -0.05006440153121132, -0.05069335078441531, -0.05133018132281673, -0.05197499113819293, -0.05262787941129812, -0.05328894652515539, -0.05395829407845367, -0.05463602489904654, -0.05532224305755292, -0.05601705388105827, -0.05672056396691401, -0.05743288119663504, -0.05815411474989225, -0.05888437511859878, -0.05962377412108862, -0.06037242491638463, -0.0611304420185541, -0.06189794131114985, -0.06267504006173356, -0.06346185693647893, -0.06425851201485239, -0.06506512680436709, -0.0658818242554082, -0.06670872877612473, -0.06754596624738549, -0.0683936640377945, -0.06925195101876147, -0.07012095757962468, -0.07100081564281871, -0.0718916586790855, -0.07279362172272155, -0.0737068413868556, -0.0746314558787535, -0.07556760501514068, -0.07651543023753973, -0.07747507462761367, -0.07844668292250906, -0.07943040153019243, -0.08042637854477, -0.08143476376178557, -0.0824557086934863, -0.08348936658404747, -0.08453589242474828, -0.08559544296908779, -0.08666817674783057, -0.08775425408397343, -0.08885383710761949, -0.08996708977074937, -0.09109417786187772, -0.09223526902058031, -0.0933905327518808, -0.094560140440482, -0.09574426536482639, -0.09694308271097236, -0.09815676958626858, -0.09938550503281081, -0.1006294700406642, -0.1018888475608316, -0.1031638225179512, -0.1044545818227001, -0.1057613143838885, -0.1070842111202176, -0.1084234649716841, -0.1097792709106027, -0.1111518259522274, -0.1125413291649422, -0.1139479816799984, -0.1153719867007682, -0.1168135495114872, -0.1182728774854565, -0.119750180092673, -0.1212456689068554, -0.1227595576118325, -0.1242920620072599, -0.1258434000136265, -0.1274137916765158, -0.12900345917008, -0.130612626799689, -0.1322415210037085, -0.133890370354366, -0.1355594055576565, -0.1372488594522419, -0.1389589670072932, -0.1406899653192251, -0.1424420936072678, -0.1442155932078225, -0.1460107075675409, -0.1478276822350701, -0.1496667648514, -0.1515282051387471, -0.1534122548879106, -0.1553191679440291, -0.1572492001906645, -0.1592026095321414, -0.1611796558740599, -0.1631806011019043, -0.1652057090576627, -0.1672552455143697, -0.1693294781484832, -0.1714286765100019, -0.1735531119902246, -0.1757030577870537, -0.1778787888677362, -0.1800805819289374, -0.1823087153540296, -0.1845634691674873, -0.1868451249862632, -0.1891539659680255, -0.1914902767561205, -0.1938543434211368, -0.1962464533989241, -0.1986668954249309, -0.2011159594647072, -0.2035939366404271, -0.206101119153262, -0.2086378002014528, -0.2112042738939015, -0.2138008351591154, -0.2164277796493187, -0.2190854036395423, -0.2217740039215055, -0.2244938776920834, -0.2272453224361567, -0.2300286358036321, -0.2328441154804088, -0.235692059053069, -0.238572763867055, -0.2414865268780923, -0.2444336444966088, -0.2474144124248927, -0.250429125486723, -0.2534780774492021, -0.2565615608365041, -0.2596798667352515, -0.2628332845912189, -0.2660221019970599, -0.2692466044707352, -0.2725070752243154, -0.2758037949228286, -0.2791370414328019, -0.2825070895601434, -0.2859142107770003, -0.289358672937218, -0.292840739980018, -0.2963606716214938, -0.2999187230335265, -0.3035151445096988, -0.307150181117785, -0.3108240723383806, -0.3145370516892207, -0.3182893463347343, -0.3220811766803649, -0.3259127559511764, -0.329784289754264, -0.3336959756244577, -0.3376480025528226, -0.3416405504974292, -0.3456737898758639, -0.3497478810389471, -0.3538629737251067, -0.3580192064948514, -0.3622167061447774, -0.366455587100537, -0.3707359507881895, -0.3750578849833413, -0.3794214631374858, -0.3838267436809413, -0.3882737693017805, -0.3927625662001475, -0.3972931433173439, -0.4018654915390769, -0.406479582872248, -0.4111353695946764, -0.4158327833771369, -0.4205717343771075, -0.425352110303625, -0.4301737754526478, -0.4350365697123434, -0.439940307537721, -0.4448847768940384, -0.4498697381684585, -0.4548949230493978, -0.4599600333730819, -0.4650647399368136, -0.470208681278503, -0.4753914624220325, -0.4806126535880739, -0.4858717888700035, -0.4911683648746163, -0.496501839327367, -0.5018716296419579, -0.5072771114541091, -0.5127176171194436, -0.5181924341754658, -0.5237008037677114, -0.5292419190402136, -0.5348149234905267, -0.5404189092896486, -0.5460529155672894, -0.551715926663041, -0.5574068703441505, -0.5631246159906883, -0.5688679727491052, -0.5746356876552702, -0.580426443728285, -0.5862388580365369, -0.5920714797376238, -0.5979227880940188, -0.6037911904665361, -0.6096750202878874, -0.6155725350188808, -0.6214819140900549, -0.6274012568318292, -0.6333285803965416, -0.6392618176760391, -0.64519881521885, -0.6511373311512657, -0.6570750331070693, -0.6630094961710038, -0.6689382008414885, -0.6748585310185211, -0.6807677720231445, -0.6866631086553249, -0.6925416232975945, -0.6984002940723019, -0.704235993060867, -0.7100454845939989, -0.7158254236223955, -0.7215723541780829, -0.7272827079371468, -0.7329528028952828, -0.7385788421682519, -0.7441569129300231, -0.7496829855021079, -0.7551529126083109, -0.7605624288098816, -0.7659071501368059, -0.7711825739317871, -0.7763840789242118, -0.7815069255522463, -0.7865462565519789, -0.7914970978333642, -0.7963543596635082, -0.8011128381786493, -0.8057672172469786, -0.8103120707052166, -0.8147418649926145, -0.8190509622067503, -0.8232336236062126, -0.8272840135858341, -0.831196204150774, -0.8349641799162069, -0.838581843659839, -0.84204302245478, -0.8453414744105334, -0.848470896049985, -0.8514249303502299, -0.8541971754748838, -0.8567811942251783, -0.8591705242365741, -0.8613586889468499, -0.8633392093606297, -0.8651056166340018, -0.8666514655013475, -0.8679703485645858, -0.8690559114628051, -0.869901868937652, -0.8705020218067872, -0.8708502748542581, -0.8709406556426804, -0.870767334247616, -0.8703246439095257, -0.8696071025930157, -0.868609435436835, -0.8673265980711357, -0.8657538007708339, -0.8638865334054928, -0.8617205911369399, -0.8592521008057629, -0.8564775479369191, -0.853393804282864, -0.8499981558098411, -0.8462883310192525, -0.8422625294813335, -0.8379194504426367, -0.8332583213521291, -0.8282789261330987, -0.8229816330087756, -0.817367421671073, -0.811437909560093, -0.8051953770010566, -0.7986427909229605, -0.7917838268602155, -0.7846228889148787, -0.7771651273330166, -0.769416453324318, -0.7613835507296333, -0.7530738841166246, -0.7444957028597604, -0.735658040737581, -0.7265707105579726, -0.7172442933013934, -0.7076901212531342, -0.6979202545792372, -0.6879474507872491, -0.6777851265030141, -0.6674473109892195, -0.65694859083071, -0.6463040452168827, -0.6355291712634507, -0.6246397988354357, -0.6136519943615901, -0.6025819531684552, -0.5914458799111482, -0.58025985673897, -0.5690396989081047, -0.5578007976425018, -0.5465579501485941, -0.5353251768111694, -0.5241155257378475, -0.5129408649791881, -0.501811662931955, -0.4907367576352787, -0.4797231158944765, -0.4687755834158701, -0.4578973766033018, -0.4470929735673043, -0.4363673049480081, -0.4257252118896889, -0.4151714395328931, -0.4047106305872581, -0.3943473190043328, -0.3840859237705698, -0.3739307428414719, -0.3638859472386361, -0.3539555753321196, -0.3441435275206222, -0.3344535625153297, -0.3248892891880326, -0.31545416304445, -0.3061514819404585, -0.2969843819654103, -0.2879558336717769, -0.2790686386712498, -0.270325426615572, -0.2617286525780794, -0.2532805948492306, -0.2449833531563117, -0.2368388473141131, -0.2288488163097291, -0.2210148178208825, -0.2133382281633876, -0.205820242659697, -0.198461876417049, -0.1912639655006305, -0.1842271684845372, -0.1773519683611706, -0.1706386747881507, -0.1640874266508341, -0.1576981949180893, -0.1514707857690787, -0.1454048439693406, -0.1394998564753841, -0.1337551562482223, -0.1281699262576697, -0.1227432036607411, -0.1174738841390258, -0.1123607263813997, -0.1074023566998332, -0.1025972737672946, -0.0979438534678277, -0.09344035384976498, -0.08908492017372939, -0.08487559004757941, -0.08081029864076526, -0.07688688397072489, -0.07310309225395217, -0.06945658331425632, -0.06594493604051604, -0.06256565388593385, -0.05931617040044393, -0.05619385478753621, -0.05319601747634991, -0.0503199156994745, -0.04756275906650127, -0.04492171512299277, -0.04239391488419683, -0.03997645833254768, -0.03766641986775183, -0.03546085369808282, -0.03335679916139504, -0.0313512859643206, -0.02944133932813931, -0.02762398502990989, -0.02589625432761974, -0.02425518875835029, -0.02269784479876991, -0.02122129837764429, -0.01982264923050474, -0.01849902508712039, -0.01724758568299051, -0.01606552658670199, -0.01495008283566477, -0.01389853237346739, -0.01290819928284725, -0.01197645680907974, -0.01110073016940329, -0.0102784991449619, -0.009507300452599808, -0.00878472989474333, -0.008108444286470748, -0.007476163159773983, -0.006885670245883601, -0.006334814737404701, -0.005821512332854752, -0.005343746067038896, -0.004899566931463392, -0.004487094289785684, -0.004104516094018836, -0.003750088907875495, -0.003422137744299464, -0.003119055724795327, -0.002839303568738726, -0.002581408921272121, -0.002343965612829745, -0.002125632441830194, -0.001925132323145202, -0.001741250981479139, -0.001572835614565252, -0.001418793453547422, -0.001278090230864552, -0.001149748565951618, -0.001032846279010827, -0.0009265146429898077, -0.000829936583734207, -0.0007423448380620148, -0.0006630200792396328, -0.0005912890190286408, -0.0005265224951209876, -0.0004681335523929222, -0.0004155755259884479, -0.0003683401337956041, -0.000325955585407818, -0.0002879847141721743, -0.0002540231384211536, -0.0002236974574684145, -0.0001966634874267993, -0.0001726045413820382, -0.0001512297579325209, -0.0001322724815878168, -0.0001154886980098645, -0.0001006555265842501, -8.756977232779733e-05, -7.604653867555174e-05, -6.591790224764401e-05, -5.70316502766019e-05, -4.925008098029639e-05, -4.244886679637852e-05, -3.651598005197754e-05, -3.135068032848763e-05, -2.686256249603502e-05, -2.297066413596826e-05, -1.960263084245008e-05, -1.669393769566789e-05, -1.418716502879952e-05, -1.203132646791737e-05, -1.018124710752374e-05, -8.596989593242441e-06, -7.243325816039448e-06, -6.08925187777474e-06, -5.107543964349964e-06, -4.274352758583521e-06, -3.568834038526117e-06, -2.972813136499502e-06, -2.470480977959836e-06, -2.048119475621659e-06, -1.693854121422992e-06, -1.397431695204311e-06, -1.15002109281152e-06, -9.44035366186721e-07, -7.729731624364527e-07, -6.312778465155973e-07, -5.142126917891545e-07, -4.177506231932154e-07, -3.384770979819198e-07, -2.735048082022627e-07, -2.203989862793338e-07, -1.771121897277343e-07, -1.419275324450958e-07, -1.134094178090199e-07, -9.036091250572266e-08, -7.178697937568346e-08, -5.686286236235569e-08, -4.490698676606287e-08, -3.53578033783994e-08, -2.775406570696427e-08, -2.171808545483415e-08, -1.694156279939137e-08, -1.317363496584055e-08, -1.021082928901561e-08, -7.888645605332418e-09, -6.074527636557793e-09, -4.662014253419658e-09, -3.565889383174636e-09, -2.71817409791549e-09, -2.064826336103247e-09, -1.56303301121192e-09, -1.178996184474119e-09, -8.861297505085153e-10, -6.635959212549861e-10, -4.951218990885453e-10, -3.680466950077778e-10, -2.72556249420049e-10, -2.010720142772116e-10, -1.477641051516428e-10, -1.081651655282164e-10, -7.886532505320191e-11, -5.727218793054724e-11, -4.142275427156264e-11, -2.983664192875247e-11, -2.140201471678725e-11, -1.528730093692767e-11, -1.087316108826641e-11, -7.700285051799253e-12, -5.429509344746697e-12, -3.811480510183785e-12, -2.663682088520683e-12, -1.853115863678723e-12, -1.283304959545495e-12, -8.845850438108561e-13, -6.068854427166951e-13, -4.14386809082069e-13, -2.815863105312856e-13, -1.904134289630391e-13, -1.281261285979713e-13, -8.57837959661591e-14, -5.714434117179601e-14, -3.787165220438465e-14, -2.496896904703374e-14, -1.637586432122985e-14, -1.068308476806825e-14, -6.931849589321237e-15, -4.473345190029624e-15, -2.870892566951851e-15, -1.832199342861873e-15, -1.162704344826699e-15, -7.336274779140829e-16, -4.602137435480737e-16, -2.870036271704963e-16, -1.779183556763071e-16, -1.096261517020342e-16, -6.713626402075545e-17, -4.086207575564687e-17, -2.471554282589801e-17, -1.485498259044479e-17, -8.871391838293797e-18, -5.26373372953253e-18, -3.102723426670837e-18, -1.816783740552066e-18, -1.056666590291904e-18, -6.103941732774315e-19, -3.501733264884258e-19, -1.994886371619825e-19, -1.128437055535844e-19, -6.337555161606953e-20, -3.533556737364373e-20, -1.955727981587821e-20, -1.0744112378043e-20, -5.858109381989755e-21, -3.169770763349203e-21, -1.701922373642684e-21, -9.066729954746004e-22, -4.792009090810614e-22, -2.512447677554877e-22, -1.306606203362766e-22, -6.73931854773507e-23, -3.447192483051058e-23, -1.748426880240217e-23, -8.792559549380176e-24, -4.383509668497278e-24, -2.166305551579044e-24, -1.061109518072868e-24, -5.151034815087648e-25, -2.477838040420325e-25, -1.180987293121375e-25, -5.576484089266185e-26, -2.608356944868005e-26, -1.20840519184124e-26, -5.544267027144122e-27, -2.518882878052408e-27, -1.133051497204599e-27, -5.045613606391362e-28, -2.224052703014379e-28, -9.702557503516154e-29, -4.188707070097841e-29, -1.789238068755999e-29, -7.56121535496772e-30, -3.160753086486405e-30, -1.306785592362018e-30, -5.342830326171278e-31, -2.159878489516088e-31, -8.632068751483211e-32, -3.410075363710469e-32, -1.331409837503723e-32, -5.13681539102802e-33, -1.95819328813451e-33, -7.375880673986779e-34, -2.748672654379422e-34, -1.024093211538311e-34, -3.768153545027147e-35, -1.369058679286161e-35, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "label": "1S", + "angular_momentum": 0 + }, + { + "occupation": 1.0, + "radial_function": [ + -0.0004047180474683475, -0.0004098087706731582, -0.0004149635271730114, -0.000420183122403017, -0.0004254683719292356, -0.0004308201015761035, -0.0004362391475554619, -0.0004417263565972048, -0.0004472825860815736, -0.0004529087041731148, -0.0004586055899563203, -0.0004643741335729751, -0.0004702152363612331, -0.0004761298109964414, -0.000482118781633734, -0.000488183084052421, -0.0004943236658021913, -0.0005005414863511571, -0.0005068375172357576, -0.0005132127422125473, -0.0005196681574118964, -0.0005262047714936193, -0.0005328236058045662, -0.00053952569453819, -0.0005463120848961272, -0.0005531838372518006, -0.0005601420253160911, -0.000567187736305084, -0.0005743220711099257, -0.0005815461444688235, -0.0005888610851411997, -0.0005962680360840392, -0.0006037681546304558, -0.0006113626126704996, -0.0006190525968342442, -0.0006268393086771728, -0.0006347239648678913, -0.0006427077973782091, -0.0006507920536756044, -0.0006589779969181161, -0.0006672669061516773, -0.0006756600765099355, -0.0006841588194165851, -0.0006927644627902413, -0.0007014783512518967, -0.0007103018463349771, -0.0007192363266980427, -0.0007282831883401637, -0.0007374438448190038, -0.000746719727471645, -0.00075611228563819, -0.0007656229868881762, -0.0007752533172498299, -0.0007850047814422076, -0.000794878903110264, -0.000804877225062857, -0.0008150013095137592, -0.0008252527383256921, -0.0008356331132574333, -0.0008461440562140294, -0.0008567872095001457, -0.00086756423607661, -0.0008784768198201809, -0.0008895266657865747, -0.000900715500476808, -0.0009120450721068757, -0.0009235171508808313, -0.000935133529267292, -0.000946896022279422, -0.0009588064677584339, -0.0009708667266606521, -0.0009830786833481882, -0.0009954442458832635, -0.001007965346326237, -0.001020643941037377, -0.001033482010982419, -0.001046481562041969, -0.001059644625324795, -0.00107297325748506, -0.001086469541043534, -0.001100135584712833, -0.001113973523726774, -0.001127985520173837, -0.001142173763334834, -0.001156540470024821, -0.001171087884939295, -0.001185818281004754, -0.001200733959733645, -0.001215837251583785, -0.001231130516322301, -0.001246616143394121, -0.00126229655229511, -0.001278174192949888, -0.0012942515460944, -0.001310531123663283, -0.001327015469182104, -0.001343707158164516, -0.001360608798514412, -0.00137772303093312, -0.001395052529331728, -0.00141260000124856, -0.001430368188271919, -0.001448359866468128, -0.001466577846814941, -0.001485024975640407, -0.001503704135067227, -0.00152261824346271, -0.001541770255894363, -0.001561163164591199, -0.001580799999410853, -0.001600683828312533, -0.00162081775783593, -0.001641204933586129, -0.001661848540724604, -0.001682751804466387, -0.001703917990583449, -0.001725350405914425, -0.00174705239888072, -0.001769027360009087, -0.001791278722460779, -0.001813809962567306, -0.001836624600372941, -0.00185972620018402, -0.001883118371125131, -0.001906804767702277, -0.001930789090373093, -0.001955075086124234, -0.001979666549055975, -0.002004567320974159, -0.002029781291989564, -0.00205531240112476, -0.00208116463692861, -0.00210734203809843, -0.002133848694109972, -0.002160688745855301, -0.002187866386288643, -0.002215385861080337, -0.002243251469278985, -0.002271467563981879, -0.002300038553013833, -0.002328968899614504, -0.002358263123134331, -0.002387925799739191, -0.002417961563123847, -0.002448375105234357, -0.002479171176999502, -0.002510354589071369, -0.002541930212575204, -0.002573902979868639, -0.002606277885310433, -0.002639059986038783, -0.002672254402759407, -0.002705866320543474, -0.002739900989635486, -0.00277436372627127, -0.002809259913506187, -0.0028445950020537, -0.002880374511134398, -0.002916604029335616, -0.002953289215481839, -0.002990435799515905, -0.003028049583391248, -0.003066136441975264, -0.003104702323963942, -0.003143753252807922, -0.003183295327650078, -0.003223334724274815, -0.00326387769606917, -0.003304930574995923, -0.003346499772578823, -0.003388591780900046, -0.003431213173610162, -0.003474370606950581, -0.003518070820788791, -0.003562320639666459, -0.003607126973860543, -0.003652496820457673, -0.003698437264441795, -0.00374495547979541, -0.003792058730614464, -0.003839754372237079, -0.003888049852386337, -0.003936952712327198, -0.00398647058803783, -0.004036611211395444, -0.004087382411376785, -0.004138792115273624, -0.004190848349923188, -0.004243559242953911, -0.004296933024046585, -0.004350978026211092, -0.004405702687079007, -0.004461115550212078, -0.004517225266426973, -0.004574040595136389, -0.004631570405706636, -0.004689823678832094, -0.004748809507926543, -0.004808537100531684, -0.004869015779743057, -0.004930254985653429, -0.00499226427681406, -0.005055053331713888, -0.005118631950276927, -0.005183010055378059, -0.005248197694377435, -0.005314205040673783, -0.005381042395276674, -0.005448720188398144, -0.005517248981063809, -0.005586639466743613, -0.005656902473002701, -0.005728048963172317, -0.005800090038041182, -0.005873036937567583, -0.00594690104261221, -0.006021693876692281, -0.006097427107756888, -0.006174112549984007, -0.006251762165599409, -0.006330388066717541, -0.006410002517204918, -0.006490617934565954, -0.00657224689185172, -0.006654902119591827, -0.006738596507749568, -0.006823343107700774, -0.006909155134236494, -0.006996045967589785, -0.007084029155487019, -0.007173118415223652, -0.007263327635765158, -0.007354670879872953, -0.007447162386255916, -0.007540816571747658, -0.00763564803350966, -0.007731671551260914, -0.007828902089533912, -0.007927354799957646, -0.008027045023567669, -0.008127988293143503, -0.008230200335573883, -0.008333697074249776, -0.008438494631485736, -0.008544609330969876, -0.008652057700242518, -0.008760856473204048, -0.008871022592652123, -0.008982573212848578, -0.00909552570211627, -0.009209897645466189, -0.009325706847255203, -0.009442971333874492, -0.009561709356469201, -0.00968193939368949, -0.009803680154473261, -0.009926950580860924, -0.01005176985084236, -0.01017815738123653, -0.01030613283060392, -0.01043571610219203, -0.0105669273469144, -0.01069978696636325, -0.01083431561585614, -0.01097053420751693, -0.01110846391339114, -0.01124812616859626, -0.01138954267450697, -0.0115327354019759, -0.01167772659458985, -0.01182453877196196, -0.01197319473305998, -0.01212371755957088, -0.01227613061930218, -0.01243045756961998, -0.01258672236092437, -0.01274494924016189, -0.01290516275437575, -0.01306738775429379, -0.01323164939795442, -0.01339797315437075, -0.01356638480723321, -0.0137369104586507, -0.01390957653293053, -0.01408440978039735, -0.01426143728125105, -0.01444068644946423, -0.01462218503671866, -0.01480596113638164, -0.01499204318752183, -0.01518045997896485, -0.01537124065338883, -0.01556441471145973, -0.01576001201600673, -0.01595806279623766, -0.0161585976519944, -0.01636164755804852, -0.01656724386843676, -0.01677541832083672, -0.01698620304098233, -0.01719963054711942, -0.01741573375450113, -0.01763454597992271, -0.01785610094629629, -0.01808043278726473, -0.01830757605185479, -0.01853756570916933, -0.01877043715311802, -0.01900622620718672, -0.01924496912924482, -0.01948670261639037, -0.01973146380983266, -0.01997929029981164, -0.02023022013055384, -0.02048429180526435, -0.02074154429115409, -0.02100201702450205, -0.02126574991575165, -0.02153278335464045, -0.02180315821536308, -0.02207691586176542, -0.02235409815257064, -0.02263474744663484, -0.02291890660823237, -0.02320661901236905, -0.0234979285501227, -0.02379287963400946, -0.02409151720337494, -0.02439388672980857, -0.02470003422258018, -0.02501000623409715, -0.02532384986538005, -0.0256416127715563, -0.0259633431673688, -0.0262890898326988, -0.02661890211810037, -0.0269528299503446, -0.02729092383797166, -0.02763323487684817, -0.02797981475572756, -0.02833071576181116, -0.02868599078630707, -0.02904569332998417, -0.02940987750871844, -0.02977859805902868, -0.03015191034359845, -0.03052987035678026, -0.03091253473007951, -0.03129996073761382, -0.03169220630154432, -0.03208932999747469, -0.03249139105981356, -0.03289844938709624, -0.03331056554726124, -0.03372780078287645, -0.03415021701631007, -0.03457787685484101, -0.03501084359570353, -0.03544918123106027, -0.03589295445289736, -0.03634222865783583, -0.03679706995185254, -0.0372575451549039, -0.03772372180544538, -0.03819566816483919, -0.03867345322164274, -0.0391571466957691, -0.03964681904251233, -0.04014254145642738, -0.04064438587505696, -0.04115242498249461, -0.04166673221277483, -0.04218738175307954, -0.04271444854675064, -0.04324800829609695, -0.0437881374649841, -0.0443349132811949, -0.04488841373854855, -0.04544871759876423, -0.04601590439305631, -0.04659005442344683, -0.04717124876378003, -0.04775956926042387, -0.04835509853264275, -0.04895791997262393, -0.04956811774514148, -0.05018577678683909, -0.05081098280511254, -0.05144382227657376, -0.05208438244507485, -0.05273275131927251, -0.05338901766970945, -0.05405327102539116, -0.05472560166983453, -0.0554061006365633, -0.05609485970402529, -0.05679197138990538, -0.05749752894480621, -0.05821162634526836, -0.05893435828610133, -0.05966582017199311, -0.06040610810836849, -0.06115531889146138, -0.06191354999756832, -0.06268089957144694, -0.06345746641382324, -0.06424334996796863, -0.0650386503053077, -0.06584346811001644, -0.066657904662567, -0.06748206182217581, -0.06831604200810942, -0.06915994817979979, -0.07001388381572207, -0.07087795289098152, -0.07175225985355888, -0.07263690959915875, -0.07353200744460474, -0.07443765909972279, -0.0753539706376536, -0.0762810484635286, -0.07721899928144788, -0.07816793005969079, -0.0791279479940906, -0.0800991604695021, -0.08108167501928604, -0.08207559928273654, -0.08308104096036861, -0.08409810776698667, -0.08512690738244802, -0.08616754740003289, -0.08722013527233266, -0.08828477825455984, -0.08936158334518589, -0.09045065722380573, -0.09155210618612518, -0.09266603607596761, -0.09379255221418735, -0.09493175932437797, -0.09608376145525854, -0.09724866189961798, -0.09842656310969222, -0.09961756660884738, -0.1008217728994366, -0.1020392813666945, -0.1032701901785299, -0.1045145961810734, -0.1057725947898296, -0.1070442798762841, -0.1083297436498069, -0.1096290765346905, -0.1109423670421588, -0.1122697016371752, -0.1136111645998752, -0.1149668378814431, -0.1163368009542514, -0.1177211306560708, -0.119119901028157, -0.1205331831470189, -0.1219610449496607, -0.1234035510520926, -0.1248607625608951, -0.1263327368776191, -0.1278195274957996, -0.1293211837903543, -0.130837750799136, -0.1323692689964002, -0.1339157740579464, -0.1354772966176885, -0.1370538620154012, -0.1386454900353888, -0.1402521946358179, -0.141873983668449, -0.1435108585885022, -0.1451628141543867, -0.1468298381170191, -0.1485119108984571, -0.150209005259566, -0.1519210859564404, -0.1536481093852965, -0.1553900232155515, -0.1571467660108034, -0.1589182668374288, -0.1607044448605131, -0.1625052089268247, -0.1643204571345584, -0.1661500763895619, -0.167993941947773, -0.1698519169435921, -0.1717238519039247, -0.173609584247634, -0.1755089377701488, -0.1774217221129811, -0.1793477322179222, -0.1812867477656931, -0.1832385325988359, -0.185202834128657, -0.1871793827260368, -0.1891678910959517, -0.1911680536355627, -0.1931795457757577, -0.1952020233060516, -0.1972351216827825, -0.1992784553205639, -0.2013316168669957, -0.2033941764606639, -0.2054656809725012, -0.207545653230625, -0.2096335912288106, -0.2117289673188099, -0.2138312273867789, -0.2159397900141332, -0.2180540456232151, -0.2201733556082204, -0.2222970514519061, -0.2244244338286742, -0.2265547716947137, -0.228687301365962, -0.2308212255847509, -0.2329557125760941, -0.2350898950946828, -0.237222869463767, -0.239353694607221, -0.2414813910762207, -0.2436049400720915, -0.2457232824670314, -0.2478353178245629, -0.2499399034217264, -0.2520358532751991, -0.2541219371736985, -0.2561968797192116, -0.2582593593797975, -0.2603080075569059, -0.2623414076703748, -0.2643580942644956, -0.2663565521387696, -0.268335215507227, -0.2702924671904279, -0.2722266378445499, -0.2741360052322217, -0.27601879354007, -0.2778731727482273, -0.2796972580573673, -0.2814891093791507, -0.2832467308962862, -0.2849680706987421, -0.2866510205030052, -0.2882934154616075, -0.289893034070524, -0.2914475981823847, -0.292954773133826, -0.2944121679956624, -0.2958173359549424, -0.2971677748383047, -0.298460927786438, -0.2996941840897876, -0.3008648801960178, -0.3019703009000859, -0.3030076807280972, -0.3039742055264425, -0.3048670142679952, -0.3056832010874199, -0.3064198175578838, -0.3070738752216575, -0.307642348387253, -0.30812217720588, -0.3085102710400377, -0.3088035121370877, -0.3089987596205797, -0.30909285381197, -0.3090826208951465, -0.3089648779358644, -0.3087364382677915, -0.3083941172563213, -0.3079347384506689, -0.3073551401339915, -0.3066521822803303, -0.3058227539260954, -0.3048637809625557, -0.3037722343543475, -0.3025451387873828, -0.3011795817476854, -0.2996727230305993, -0.2980218046775043, -0.2962241613345932, -0.2942772310254286, -0.2921785663258747, -0.2899258459265812, -0.2875168865644667, -0.2849496553006036, -0.2822222821175404, -0.2793330728043668, -0.2762805220927959, -0.2730633270021038, -0.2696804003450348, -0.2661308843406705, -0.2624141642738174, -0.2585298821336927, -0.2544779501576089, -0.2502585641979651, -0.2458722168232228, -0.2413197100556512, -0.236602167640589, -0.2317210467337614, -0.2266781488849486, -0.2214756301880385, -0.2161160104593484, -0.2106021812981358, -0.2049374128755541, -0.199125359291107, -0.1931700623290088, -0.1870759534409766, -0.1808478537770292, -0.1744909720820232, -0.1680109002732027, -0.161413606513131, -0.1547054255933455, -0.1478930464471789, -0.1409834966157062, -0.1339841234990784, -0.1269025722368619, -0.1197467600758475, -0.1125248471024185, -0.1052452032394047, -0.09791637143477999, -0.09054702700186756, -0.08314593310858777, -0.07572189245659812, -0.06828369524073956, -0.06084006353510778, -0.05339959231463585, -0.04597068739052276, -0.03856150061435096, -0.03117986278933039, -0.02383321481778782, -0.01652853771159627, -0.009272282196399372, -0.002070298750789188, 0.005072230962720872, 0.01215086019783565, 0.01916193955506397, 0.02610267441657795, 0.03297117593351002, 0.03976650399016462, 0.04648870045030286, 0.05313881087869429, 0.05971889282974536, 0.06623200870792928, 0.07268220113629084, 0.07907444872395522, 0.08541460010665457, 0.09170928415124743, 0.09796579427207551, 0.1041919449102225, 0.1103958983828661, 0.1165859605259801, 0.1227703438366075, 0.1289568971777292, 0.1351528015463223, 0.1413642319296883, 0.1475959858924502, 0.1538510802513709, 0.1601303180108203, 0.1664321815649648, 0.172754548515615, 0.1790953559987464, 0.1854525084996076, 0.191823878612876, 0.1982073077550141, 0.2046006068174845, 0.2110015567494221, 0.217407909058575, 0.2238173862196883, 0.2302276819802266, 0.2366364617751937, 0.2430413645799082, 0.2494399988409274, 0.2558299440897095, 0.2622087508599952, 0.268573940252294, 0.2749230033764877, 0.2812534006815165, 0.2875625611848145, 0.2938478816178791, 0.3001067255079748, 0.3063364222193323, 0.3125342659801633, 0.3186975149242336, 0.3248233901775011, 0.3309090750213208, 0.3369517141639165, 0.3429484131511863, 0.3488962379464448, 0.3547922147065419, 0.3606333297789659, 0.3664165299412238, 0.3721387229001206, 0.3777967780646735, 0.383387527602491, 0.3889077677856119, 0.3943542606281866, 0.399723735815073, 0.4050128929174632, 0.4102184038891448, 0.4153369158348598, 0.4203650540405386, 0.4252994252538773, 0.4301366212027625, 0.4348732223384146, 0.4395058017897216, 0.4440309295150929, 0.4484451766381568, 0.4527451199537488, 0.4569273465908843, 0.4609884588196705, 0.464925078989447, 0.4687338545857537, 0.4724114633940671, 0.4759546187585343, 0.4793600749242203, 0.4826246324516419, 0.4857451436925623, 0.4887185183162128, 0.4915417288752523, 0.4942118164008836, 0.4967258960166359, 0.4990811625603783, 0.5012748962041524, 0.5033044680614338, 0.5051673457714101, 0.5068610990498508, 0.5083834051961044, 0.5097320545456983, 0.5109049558579861, 0.5119001416282046, 0.5127157733132588, 0.5133501464604759, 0.5138016957285064, 0.5140689997895039, 0.5141507861016252, 0.5140459355408696, 0.5137534868812096, 0.5132726411119141, 0.5126027655809539, 0.5117433979533202, 0.5106942499730975, 0.5094552110181056, 0.5080263514359318, 0.5064079256502017, 0.5046003750259582, 0.5026043304830642, 0.500420614846622, 0.4980502449234672, 0.4954944332939185, 0.492754589808083, 0.4898323227761735, 0.4867294398424785, 0.4834479485328363, 0.4799900564657253, 0.4763581712173438, 0.4725548998314026, 0.4685830479647204, 0.4644456186601457, 0.4601458107385643, 0.4556870168029296, 0.4510728208469253, 0.4463069954621955, 0.441393498638506, 0.4363364701520178, 0.4311402275377015, 0.4258092616428236, 0.4203482317594348, 0.4147619603348462, 0.4090554272602376, 0.4032337637387457, 0.3973022457356914, 0.3912662870149731, 0.3851314317671102, 0.3789033468359423, 0.3725878135525776, 0.3661907191868513, 0.3597180480282567, 0.3531758721100842, 0.3465703415923236, 0.3399076748207083, 0.3331941480811771, 0.3264360850708904, 0.3196398461088377, 0.3128118171109474, 0.3059583983564463, 0.2990859930740426, 0.2922009958782442, 0.2853097810878116, 0.2784186909599419, 0.2715340238752514, 0.2646620225100107, 0.2578088620332948, 0.2509806383677882, 0.2441833565538892, 0.2374229192574571, 0.2307051154620738, 0.2240356093869748, 0.2174199296718801, 0.2108634588697961, 0.2043714232884349, 0.1979488832202503, 0.1916007236001555, 0.1853316451288179, 0.1791461558979874, 0.1730485635526137, 0.167042968022571, 0.1611332548546066, 0.1553230891727129, 0.1496159102924676, 0.1440149270120327, 0.1385231135994598, 0.1331432064927266, 0.1278777017255674, 0.1227288530886685, 0.1176986710322006, 0.1127889223120025, 0.1080011303780045, 0.1033365764997532, 0.09879630162017676, 0.09438110892504017, 0.09009156711193142, 0.08592801433909603, 0.081890562831046, 0.0779791041146292, 0.07419331485617704, 0.07053266326748882, 0.0669964160457691, 0.06358364581023784, 0.0602932389959968, 0.05712390416387289, 0.05407418068338834, 0.05114244774472838, 0.04832693365460623, 0.04562572537025991, 0.04303677822545875, 0.040557925802347, 0.03818688990320378, 0.03592129057674331, 0.0337586561544086, 0.03169643325321134, 0.02973199670302835, 0.02786265935785965, 0.02608568175237155, 0.02439828156706603, 0.02279764286761413, 0.02128092508624753, 0.01984527171558935, 0.01848781868790444, 0.01720570241543542, 0.01599606747023647, 0.01485607388470585, 0.01378290405681877, 0.01277376924686034, 0.01182591565522641, 0.01093663007358177, 0.01010324510432007, 0.009323143945840303, 0.008593764743625557, 0.007912604509466405, 0.007277222613400443, 0.006685243855031674, 0.006134361122837723, 0.00562233765186352, 0.005147008891830034, 0.004706283999152607, 0.00429814696766282, 0.003920657413959476, 0.003571951034279147, 0.003250239750576197, 0.00295381156413968, 0.002681030135554193, 0.00243033411013844, 0.002200236208175431, 0.001989322099288134, 0.001796249080221382, 0.001619744575072726, 0.001458604476679062, 0.001311691347420871, 0.001177932497159028, 0.001056317955378015, 0.0009458983537503509, 0.0008457827338600094, 0.0007551362992082466, 0.0006731838905309757, 0.0005992534259685314, 0.0005326621438724496, 0.0004727697083232533, 0.0004189824771834697, 0.0003707509650875269, 0.0003275673696159581, 0.0002889631653336867, 0.0002545067697947576, 0.0002238012850458619, 0.0001964823176017274, 0.0001722158793201152, 0.0001506963710757675, 0.0001316446506238216, 0.0001148061855564439, 9.994929179384531e-05, 8.686345761429113e-05, 7.53577528187869e-05, 6.525932224604763e-05, 5.641196250312822e-05, 4.867478045736759e-05, 4.192093174646178e-05, 3.603643730565183e-05, 3.091907568401725e-05, 2.647734872531323e-05, 2.262951802203067e-05, 1.930270941354177e-05, 1.643208268927437e-05, 1.396006357461032e-05, 1.183563501926175e-05, 1.001368477374075e-05, 8.454406227603782e-06, 7.122749491735682e-06, 5.987919734297257e-06, 5.022919824314894e-06, 4.204134396444461e-06, 3.510952523403098e-06, 2.925426267162605e-06, 2.431962474506746e-06, 2.017045285298716e-06, 1.668986931178675e-06, 1.377704516890666e-06, 1.134520594591152e-06, 9.319854619841313e-07, 7.637192367343349e-07, 6.242718812145695e-07, 5.089994722457534e-07, 4.13955129205232e-07, 3.35793129945405e-07, 2.716848567340491e-07, 2.192453233716686e-07, 1.764691393499201e-07, 1.416748670977804e-07, 1.134568238488418e-07, 9.064347041014222e-08, 7.226161521102554e-08, 5.750574370004551e-08, 4.571186085249669e-08, 3.63353088741845e-08, 2.879661874231964e-08, 2.275362613924699e-08, 1.792420133411286e-08, 1.407642820173664e-08, 1.102025804015029e-08, 8.600439837433884e-09, 6.690551486101325e-09, 5.187977257946504e-09, 4.009695724859758e-09, 3.088759342740541e-09, 2.371362214688014e-09, 1.814406229363086e-09, 1.383487946522599e-09, 1.051239390488143e-09, 7.959654287874261e-10, 6.005287698003106e-10, 4.514409185106336e-10, 3.381237871621783e-10, 2.523121659661005e-10, 1.875720101764054e-10, 1.389135796139088e-10, 1.024819542782883e-10, 7.531041762975599e-11, 5.512471349973449e-11, 4.018830309767936e-11, 2.918052880755029e-11, 2.110107920880101e-11, 1.519538737408634e-11, 1.089661930046855e-11, 7.780753986388957e-12, 5.531949596685334e-12, 3.91595595009995e-12, 2.759793362823354e-12, 1.936289766111608e-12, 1.352367353591875e-12, 9.402097930309145e-13, 6.50632173040552e-13, 4.481274676669418e-13, 3.071829683530018e-13, 2.095541130199858e-13, 1.422565777386437e-13, 9.60945871471861e-14, 6.458755579965547e-14, 4.31911477255035e-14, 2.873484643488628e-14, 1.901792710535166e-14, 1.252071721419741e-14, 8.199326430149985e-15, 5.340487917873029e-15, 3.459457258593633e-15, 2.228585933722011e-15, 1.42762754645769e-15, 9.093559782103525e-16, 5.759110020647142e-16, 3.626167365738667e-16, 2.269757811191032e-16, 1.41227145649093e-16, 8.734362458654064e-17, 5.368903195900707e-17, 3.279803491312142e-17, 1.991058834908209e-17, 1.201047236691307e-17, 7.198484658993442e-18, 4.28639230646702e-18, 2.535578053847545e-18, 1.489910681658772e-18, 8.69571225266817e-19, 5.040513694777854e-19, 2.901561104693854e-19, 1.658586116614127e-19 + ], + "label": "2S", + "angular_momentum": 0 + } + ], + "total_charge_density": [ + 5.146417287956111e-06, 5.276699378931302e-06, 5.410279572489363e-06, 5.547241360276649e-06, 5.687670347529454e-06, 5.831654306579173e-06, 5.979283231711996e-06, 6.130649395417336e-06, 6.285847406060122e-06, 6.444974267013132e-06, 6.608129437286095e-06, 6.775414893689705e-06, 6.946935194573228e-06, 7.122797545175608e-06, 7.303111864630941e-06, 7.48799085467003e-06, 7.677550070061233e-06, 7.871907990834375e-06, 8.071186096332993e-06, 8.275508941141227e-06, 8.485004232932612e-06, 8.699802912289676e-06, 8.92003923454409e-06, 9.145850853688519e-06, 9.377378908412684e-06, 9.614768110317272e-06, 9.858166834361011e-06, 1.010772721159729e-05, 1.036360522425831e-05, 1.062596080324633e-05, 1.089495792809262e-05, 1.11707647294469e-05, 1.145355359416128e-05, 1.174350127303408e-05, 1.204078899128137e-05, 1.23456025618047e-05, 1.265813250132623e-05, 1.297857414946374e-05, 1.330712779081973e-05, 1.364399878016132e-05, 1.398939767076861e-05, 1.43435403460323e-05, 1.470664815438237e-05, 1.507894804763244e-05, 1.546067272282611e-05, 1.585206076767376e-05, 1.625335680967118e-05, 1.666481166899271e-05, 1.708668251525476e-05, 1.751923302824756e-05, 1.796273356273545e-05, 1.841746131742905e-05, 1.888370050823452e-05, 1.936174254588833e-05, 1.985188621808886e-05, 2.035443787623777e-05, 2.086971162690892e-05, 2.139802952816375e-05, 2.193972179083596e-05, 2.249512698491155e-05, 2.306459225113262e-05, 2.364847351795774e-05, 2.424713572401421e-05, 2.4860953046181e-05, 2.549030913344551e-05, 2.613559734667934e-05, 2.679722100448384e-05, 2.747559363525855e-05, 2.817113923564999e-05, 2.888429253554274e-05, 2.961549926975759e-05, 3.036521645662749e-05, 3.113391268362466e-05, 3.192206840021741e-05, 3.27301762181403e-05, 3.355874121926401e-05, 3.440828127125858e-05, 3.527932735124645e-05, 3.61724238776478e-05, 3.708812905042552e-05, 3.802701519994201e-05, 3.898966914464659e-05, 3.997669255781605e-05, 4.098870234357818e-05, 4.202633102245293e-05, 4.309022712665168e-05, 4.418105560538254e-05, 4.529949824041383e-05, 4.644625407215601e-05, 4.762203983652843e-05, 4.882759041288251e-05, 5.006365928326305e-05, 5.133101900329322e-05, 5.263046168497763e-05, 5.396279949172579e-05, 5.532886514590378e-05, 5.672951244923279e-05, 5.81656168163584e-05, 5.963807582192437e-05, 6.114780976149316e-05, 6.269576222666154e-05, 6.428290069473373e-05, 6.591021713331729e-05, 6.757872862022115e-05, 6.928947797904311e-05, 7.104353443084144e-05, 7.284199426230126e-05, 7.468598151080973e-05, 7.657664866686953e-05, 7.851517739428965e-05, 8.050277926860109e-05, 8.254069653416135e-05, 8.463020288041876e-05, 8.677260423782164e-05, 8.896923959387029e-05, 9.122148182981925e-05, 9.353073857855466e-05, 9.589845310418126e-05, 9.83261052038676e-05, 0.0001008152121325144, 0.0001033673295508207, 0.0001059840524973423, 0.000108667016385148, 0.000111417898023696, 0.000114238416666569, 0.0001171303350857202, 0.0001200954606729039, 0.0001231356465689751, 0.000126252792821765, 0.0001294488475732551, 0.0001327258082767882, 0.0001360857229450805, 0.0001395306914298094, 0.0001430628667335784, 0.0001466844563550776, 0.0001503977236682757, 0.0001542049893365099, 0.0001581086327623518, 0.0001621110935741544, 0.0001662148731502118, 0.0001704225361814738, 0.0001747367122738013, 0.0001791600975907532, 0.0001836954565379338, 0.0001883456234899517, 0.0001931135045610637, 0.0001980020794206125, 0.0002030144031543887, 0.0002081536081730759, 0.0002134229061689741, 0.000218825590122214, 0.0002243650363577216, 0.0002300447066542095, 0.0002358681504065119, 0.0002418390068426134, 0.0002479610072967476, 0.0002542379775399898, 0.0002606738401697927, 0.0002672726170599523, 0.0002740384318725378, 0.0002809755126333404, 0.0002880881943724574, 0.0002953809218316478, 0.0003028582522401501, 0.0003105248581606951, 0.000318385530407472, 0.0003264451810378909, 0.0003347088464199756, 0.0003431816903773191, 0.0003518690074135534, 0.0003607762260183301, 0.0003699089120568958, 0.0003792727722453409, 0.0003888736577137101, 0.0003987175676591835, 0.0004088106530915889, 0.0004191592206736091, 0.0004297697366580296, 0.0004406488309245152, 0.0004518033011184056, 0.0004632401168941004, 0.000474966424265699, 0.0004869895500675624, 0.0004993170065276025, 0.0005119564959561298, 0.0005249159155531585, 0.0005382033623371963, 0.0005518271381985322, 0.0005657957550802002, 0.0005801179402898232, 0.0005948026419456216, 0.0006098590345600081, 0.0006252965247641847, 0.0006411247571773352, 0.0006573536204240464, 0.0006739932533036649, 0.0006910540511154736, 0.0007085466721435454, 0.0007264820443053482, 0.0007448713719682052, 0.0007637261429378117, 0.0007830581356232032, 0.0008028794263825353, 0.0008232023970542922, 0.0008440397426785607, 0.000865404479413133, 0.0008873099526493954, 0.0009097698453329584, 0.0009327981864942268, 0.0009564093599941767, 0.000980618113490713, 0.001005439567631228, 0.001030889225476952, 0.001056982982164992, 0.001083737134813999, 0.001111168392679565, 0.00113929388756567, 0.001168131184498547, 0.001197698292669595, 0.001228013676654076, 0.001259096267912488, 0.001290965476581774, 0.001323641203563541, 0.001357143852916803, 0.001391494344562876, 0.001426714127310169, 0.001462825192207024, 0.001499850086230661, 0.001537811926320766, 0.001576734413766289, 0.001616641848954272, 0.00165755914648985, 0.001699511850696575, 0.001742526151506684, 0.001786628900751017, 0.001831847628858502, 0.001878210561975583, 0.0019257466395159, 0.001974485532151089, 0.002024457660253659, 0.002075694212803161, 0.002128227166767347, 0.002182089306969952, 0.002237314246457401, 0.002293936447376748, 0.002351991242377602, 0.002411514856551091, 0.002472544429919174, 0.002535118040488011, 0.002599274727879398, 0.002665054517554573, 0.002732498445645173, 0.002801648584406296, 0.002872548068307178, 0.002945241120775241, 0.003019773081609651, 0.003096190435081049, 0.003174540838734301, 0.003254873152911748, 0.003337237471014695, 0.00342168515052135, 0.003508268844779923, 0.003597042535595933, 0.003688061566633326, 0.003781382677649432, 0.003877064039584222, 0.003975165290524952, 0.004075747572567575, 0.004178873569597039, 0.004284607546008937, 0.004393015386395542, 0.004504164636219945, 0.004618124543502302, 0.004734966101543062, 0.004854762092708402, 0.004977587133303773, 0.005103517719562146, 0.005232632274773941, 0.005365011197586532, 0.005500736911501689, 0.005639893915599924, 0.005782568836521637, 0.005928850481735318, 0.006078829894123994, 0.006232600407921788, 0.006390257706032966, 0.006551899878767059, 0.006717627484023735, 0.006887543608962581, 0.007061753933193228, 0.007240366793522145, 0.007423493250293577, 0.007611247155362377, 0.007803745221737842, 0.008001107094938235, 0.008203455426096566, 0.008410915946859282, 0.008623617546120142, 0.008841692348632793, 0.009065275795546241, 0.009294506726908435, 0.009529527466184337, 0.009770483906835457, 0.01001752560100931, 0.01027080585038784, 0.01053048179924511, 0.01079671452976562, 0.01106966915967557, 0.01134951494224054, 0.01163642536868413, 0.01193057827308319, 0.01223215593979644, 0.01254134521348435, 0.01285833761177941, 0.01318332944066696, 0.0135165219126379, 0.01385812126767612, 0.01420833889714401, 0.01456739147063151, 0.01493550106583463, 0.01531289530153087, 0.01569980747372031, 0.01609647669500242, 0.01650314803725935, 0.01692007267771864, 0.0173475080484688, 0.01778571798950248, 0.01823497290536397, 0.0186955499254782, 0.01916773306823992, 0.01965181340894353, 0.02014808925163408, 0.02065686630496288, 0.02117845786213075, 0.02171318498500437, 0.02226137669249176, 0.02282337015326423, 0.02339951088291375, 0.02399015294563509, 0.02459565916052402, 0.02521640131258347, 0.02585276036853032, 0.02650512669749781, 0.02717390029672791, 0.02785949102235032, 0.02856231882534481, 0.02928281399278463, 0.03002141739446025, 0.03077858073498222, 0.03155476681146359, 0.03235044977688264, 0.03316611540922647, 0.03400226138651817, 0.03485939756782846, 0.03573804628037459, 0.03663874261280892, 0.03756203471479851, 0.03850848410299929, 0.03947866597352571, 0.04047316952101763, 0.04149259826440578, 0.04253757037947494, 0.04360871903832549, 0.04470669275583047, 0.04583215574318526, 0.04698578826864572, 0.04816828702554693, 0.04938036550769605, 0.05062275439222676, 0.05189620193000293, 0.05320147434365547, 0.05453935623333166, 0.05591065099023602, 0.05731618121803466, 0.05875678916219226, 0.06023333714730694, 0.06174670802250043, 0.06329780561491961, 0.0648875551913955, 0.06651690392830109, 0.06818682138964373, 0.06989830001341601, 0.07165235560622593, 0.07345002784621504, 0.07529238079426424, 0.07718050341347757, 0.07911551009692364, 0.08109854120359963, 0.08313076360257599, 0.08521337122526032, 0.08734758562570918, 0.08953465654889917, 0.09177586250685202, 0.09407251136249413, 0.09642594092110796, 0.09883751952921713, 0.1013086466807249, 0.1038407536301013, 0.1064353040123949, 0.1090937944698184, 0.1118177552846282, 0.1146087510179958, 0.1174683811545335, 0.1203982807521092, 0.1234001210965523, 0.1264756103608141, 0.1296264942681132, 0.1328545567585541, 0.1361616206586697, 0.1395495483532902, 0.1430202424590999, 0.1465756464991924, 0.1502177455778813, 0.1539485670549789, 0.1577701812186868, 0.1616847019561943, 0.1656942874210101, 0.1698011406959907, 0.1740075104509606, 0.1783156915937462, 0.1827280259133677, 0.1872469027140586, 0.1918747594386896, 0.1966140822800998, 0.2014674067787321, 0.2064373184048846, 0.2115264531237827, 0.2167374979415721, 0.2220731914302283, 0.227536324229255, 0.2331297395219284, 0.238856333483721, 0.2447190557003962, 0.2507209095531465, 0.256864952567987, 0.2631542967264769, 0.2695921087346904, 0.2761816102471797, 0.2829260780425307, 0.2898288441469127, 0.2968932959018589, 0.304122875972317, 0.3115210822908141, 0.3190914679333846, 0.3268376409226903, 0.3347632639535458, 0.3428720540358442, 0.3511677820496295, 0.3596542722068393, 0.368335401413978, 0.3772150985297275, 0.3862973435112451, 0.3955861664426069, 0.4050856464386004, 0.4147999104167583, 0.4247331317302397, 0.4348895286538679, 0.4452733627153003, 0.4558889368630241, 0.4667405934625106, 0.4778327121115554, 0.4891697072654862, 0.5007560256625723, 0.5125961435396426, 0.5246945636275455, 0.5370558119157474, 0.5496844341750062, 0.5625849922266881, 0.5757620599469633, 0.5892202189937332, 0.6029640542438036, 0.6169981489274676, 0.6313270794473003, 0.6459554098676638, 0.6608876860610556, 0.6761284294971424, 0.6916821306600118, 0.7075532420788821, 0.7237461709572557, 0.7402652713852532, 0.7571148361196429, 0.7742990879159026, 0.7918221703964797, 0.809688138439327, 0.8279009480706833, 0.8464644458460683, 0.8653823577034787, 0.8846582772728369, 0.9042956536259334, 0.9242977784512821, 0.9446677726386236, 0.9654085722581994, 0.9865229139203597, 1.008013319501696, 1.029882080224526, 1.052131240077375, 1.074762578565061, 1.097777592777988, 1.121177478771559, 1.144963112247939, 1.169135028533963, 1.193693401850751, 1.218638023872463, 1.243968281573863, 1.26968313436863, 1.295781090543047, 1.322260182992513, 1.349117944271434, 1.376351380970554, 1.403956947439355, 1.431930518875316, 1.460267363806089, 1.488962115995351, 1.518008745808226, 1.547400531077493, 1.577130027517708, 1.607189038740564, 1.637568585931428, 1.668258877254132, 1.699249277058553, 1.730528274973501, 1.76208345497585, 1.793901464535722, 1.825967983946884, 1.858267695961331, 1.89078425585726, 1.9235002620804, 1.956397227609779, 1.989455552210625, 2.022654495749041, 2.055972152755446, 2.089385428436483, 2.122870016348007, 2.156400377955038, 2.189949724317904, 2.223490000157311, 2.25699187056459, 2.290424710636794, 2.32375659832965, 2.356954310834264, 2.389983324796093, 2.422807820706622, 2.455390691809398, 2.487693557872414, 2.519676784187932, 2.551299506168658, 2.582519659915449, 2.613294019135981, 2.643578238796199, 2.673326905886014, 2.702493597677889, 2.731030947850852, 2.758890720842978, 2.786023894782124, 2.812380753327073, 2.837910986729125, 2.862563802396913, 2.886288045214449, 2.909032327823854, 2.930745171039089, 2.95137515450526, 2.970871077659015, 2.989182130978866, 3.006258077439671, 3.022049444002417, 3.036507722878924, 3.04958558221054, 3.061237085690387, 3.071417920540196, 3.080085633125085, 3.087199871353128, 3.092722632861541, 3.096618517838035, 3.098854985165364, 3.099402610409792, 3.098235344001627, 3.095330767779087, 3.090670347887589, 3.084239681846691, 3.076028737418964, 3.066032080741379, 3.054249091013596, 3.040684158882082, 3.02534686551808, 3.00825213926541, 2.989420386635258, 2.968877594354794, 2.946655399139765, 2.922791121863878, 2.897327762845997, 2.870313955075754, 2.841803872355814, 2.811857089560933, 2.780538392506189, 2.747917535285765, 2.714068943394406, 2.679071361481492, 2.643007445216063, 2.605963297463058, 2.568027949787471, 2.529292791213352, 2.489850947165332, 2.449796612606118, 2.409224344544588, 2.368228320313618, 2.326901569287163, 2.285335187001121, 2.243617541935472, 2.201833486473591, 2.160063584740607, 2.118383371091482, 2.076862653920281, 2.035564880137528, 1.994546576048651, 1.953856880393867, 1.913537184903166, 1.873620896800512, 1.83413333617666, 1.795091778957454, 1.756505653242995, 1.718376893005749, 1.680700448444494, 1.643464946640193, 1.606653489511946, 1.570244568411754, 1.534213066042691, 1.498531306789674, 1.46317010611539, 1.428099758549208, 1.393290892200565, 1.358715105947916, 1.324345293857313, 1.290164488458539, 1.256191929871013, 1.222455467143127, 1.18898248692583, 1.155799866246964, 1.122933926133109, 1.090410386212955, 1.058254320435538, 1.026490114035627, 0.995141421876882, 0.9642311283012569, 0.9337813086103229, 0.903813192300909, 0.874347128173562, 0.845402551427861, 0.8169979528535767, 0.7891508502210577, 0.7618777619680037, 0.7351941832730491, 0.709114564599239, 0.6836522927826217, 0.6588196747328007, 0.6346279238033767, 0.6110871488808627, 0.5882063462308113, 0.5659933941296602, 0.5444550503001871, 0.5235969521574899, 0.5034236198611537, 0.4839384621577665, 0.4651437849862306, 0.4470408028064862, 0.429629652600344, 0.4129094104812026, 0.3968781108375536, 0.3815327679234602, 0.366869399797677, 0.3528830545018814, 0.3395678383576783, 0.3269169462517262, 0.3149226937686215, 0.3035765510221744, 0.2928691780275154, 0.2827904614492284, 0.2733295525545048, 0.2644749061952897, 0.2562143206396536, 0.2485349780702708, 0.2414234855670386, 0.2348659163915717, 0.2288478513936543, 0.2233544203637218, 0.2183703431611028, 0.2138799704550284, 0.2098673239242342, 0.2063161357712411, 0.2032098874189231, 0.2005318472695923, 0.1982651074203067, 0.1963926192422078, 0.194897227746158, 0.1937617046715084, 0.1929687802492412, 0.1925011736047464, 0.1923416217789123, 0.1924729073588396, 0.1928778847211979, 0.1935395049019104, 0.1944408391154256, 0.19556510095526, 0.1968956673147899, 0.1984160980734345, 0.2001101545984706, 0.2019618171167869, 0.2039553010140067, 0.2060750721206452, 0.2083058610463888, 0.2106326766242745, 0.2130408185265522, 0.2155158891134208, 0.2180438045746634, 0.2206108054225518, 0.223203466392261, 0.2258087058034931, 0.2284137944340797, 0.2310063639530508, 0.2335744149570624, 0.2361063246501834, 0.2385908542028993, 0.2410171558218113, 0.2433747795569437, 0.245653679868828, 0.2478442219726697, 0.2499371879719382, 0.2519237827886925, 0.2537956398929199, 0.255544826828127, 0.2571638505254694, 0.258645662393827, 0.2599836631685184, 0.2611717074968051, 0.2622041082340208, 0.2630756404201166, 0.2637815449026727, 0.2643175315690241, 0.2646797821471344, 0.2648649525322424, 0.2648701745941411, 0.2646930574182561, 0.2643316879324878, 0.2637846308710937, 0.2630509280267263, 0.2621300967421283, 0.2610221275939117, 0.2597274812223325, 0.2582470842630049, 0.256582324339072, 0.2547350440754697, 0.2527075341005386, 0.2505025250043853, 0.2481231782279931, 0.2455730758621514, 0.2428562093407492, 0.2399769670188505, 0.2369401206321794, 0.2337508106411682, 0.2304145304695044, 0.2269371096541099, 0.2233246959306548, 0.2195837362859841, 0.2157209570161764, 0.2117433428363023, 0.2076581150952404, 0.2034727091560979, 0.1991947510097997, 0.1948320331962031, 0.1903924901136049, 0.1858841728036701, 0.1813152233045779, 0.1766938486704877, 0.1720282947602208, 0.1673268199022848, 0.16259766854698, 0.1578490450192766, 0.1530890874883962, 0.1483258422715237, 0.1435672385897908, 0.1388210638945688, 0.1340949398811742, 0.1293962992954875, 0.1247323636965975, 0.120110122180825, 0.1155363112783405, 0.111017396067234, 0.1065595526095596, 0.1021686517957892, 0.09785024467642926, 0.09360954935114207, 0.0894514394766271, 0.08538043444482694, 0.08140069127278508, 0.07751599823479449, 0.07372977025640694, 0.07004504607853075, 0.06646448718831732, 0.06299037850193537, 0.05962463077275778, 0.05636878468705468, 0.05322401659810378, 0.05019114583880844, 0.0472706435425637, 0.04446264289233662, 0.04176695070882789, 0.03918306028025692, 0.03671016532884477, 0.03434717500253043, 0.03209272977504159, 0.02994521813212726, 0.02790279390722152, 0.02596339422853403, 0.02412475769982727, 0.02238444305464973, 0.02073984785281875, 0.01918822722955348, 0.01772671255120143, 0.01635232986102045, 0.01506201800390112, 0.01385264632522643, 0.01272103184617615, 0.01166395582559878, 0.0106781796269899, 0.009760459818022793, 0.008907562439362788, 0.008116276389044138, 0.007383425878381979, 0.006705881925116612, 0.006080572859131109, 0.005504493825538579, 0.004974715279101023, 0.004488390472723776, 0.004042761951083029, 0.003635167068213193, 0.003263042555041412, 0.0029239281693543, 0.002615469466475008, 0.002335419733986227, 0.002081641138138054, 0.001852105133121021, 0.001644892187167476, 0.001458190881482544, 0.001290296439322276, 0.001139608743162987, 0.001004629897881799, 0.0008839613972396543, 0.0007763009497760447, 0.0006804390185447591, 0.0005952551270006013, 0.0005197139808486704, 0.0004528614528515863, 0.0003938204745168576, 0.0003417868753161489, 0.0002960252066780776, 0.0002558645845011242, 0.0002206945804045246, 0.0001899611884198588, 0.000163162890367226, 0.0001398468397954503, 0.0001196051811287888, 0.0001020715175812028, 8.691753849649298e-05, 7.384981406671267e-05, 6.260676288573516e-05, 5.295579551874316e-05, 4.469063521662639e-05, 3.762881507795547e-05, 3.160934935807122e-05, 2.64905752396227e-05, 2.214816020376409e-05, 1.847326916617822e-05, 1.537088475538896e-05, 1.275827349935894e-05, 1.056359023607343e-05, 8.724612759988928e-06, 7.187598543871809e-06, 5.906255319643779e-06, 4.840817347589258e-06, 3.957219336311271e-06, 3.226360182427121e-06, 2.623448966122004e-06, 2.127425954811946e-06, 1.720451719307331e-06, 1.387457841605586e-06, 1.115753420631462e-06, 8.946779098913105e-07, 7.153210346995715e-07, 5.701953759103423e-07, 4.531537508310374e-07, 3.590864137121873e-07, 2.837143828199412e-07, 2.234995888751295e-07, 1.755370970644849e-07, 1.374489764620533e-07, 1.072946146030451e-07, 8.349516861793112e-08, 6.477012820825952e-08, 5.008422091602929e-08, 3.860311878495659e-08, 2.965660951163429e-08, 2.270807643349015e-08, 1.732929206464307e-08, 1.317967118633108e-08, 9.989253475335136e-09, 7.544793982217486e-09, 5.678434032425633e-09, 4.258506802186743e-09, 3.182102301463014e-09, 2.369077058580035e-09, 1.757245622318088e-09, 1.298535151710074e-09, 9.559218262308827e-10, 7.009994582647704e-10, 5.120573132012362e-10, 3.725664417805862e-10, 2.699924257366418e-10, 1.948678785475291e-10, 1.400708073619544e-10, 1.002654471695921e-10, 7.147078537244228e-11, 5.072901530140195e-11, 3.585185748793587e-11, 2.522729475571586e-11, 1.76729738841772e-11, 1.232549490391229e-11, 8.557174968975079e-12, 5.913750567667294e-12, 4.067963582553173e-12, 2.785140925161687e-12, 1.897787755576625e-12, 1.286922547851013e-12, 8.684305918209558e-13, 5.831349182610463e-13, 3.896073776149791e-13, 2.589894686342868e-13, 1.712797881989554e-13, 1.126863029653473e-13, 7.374773882214534e-14, 4.800760641905809e-14, 3.108321949268724e-14, 2.001552507415335e-14, 1.281749313691484e-14, 8.162138700883661e-15, 5.168194473274167e-15, 3.253692369176771e-15, 2.036503404869078e-15, 1.267162868863849e-15, 7.837639061826319e-16, 4.8184734520336e-16, 2.944214872020266e-16, 1.787835749663957e-16, 1.07880424385752e-16, 6.467966808318893e-17, 3.852536058211887e-17, 2.279327704570281e-17, 1.339204007834672e-17, 7.81114959075761e-18, 4.520324335077871e-18, 2.593019533536132e-18, 1.472045985082031e-18, 8.246208311627836e-19, 4.534370947953487e-19, 2.422260923251612e-19, 1.23816875476027e-19, 5.645257945254233e-20, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ] + } +} \ No newline at end of file diff --git a/verification/test15/sirius.json b/verification/test15/sirius.json new file mode 100644 index 000000000..293594863 --- /dev/null +++ b/verification/test15/sirius.json @@ -0,0 +1,77 @@ +{ + "control" : { + "!cyclic_block_size" : 2, + "processing_unit" : "cpu", + "std_evp_solver_type" : "lapack", + "gen_evp_solver_type" : "lapack", + "print_forces" : true, + "print_stress" : true + }, + + "parameters" : { + "electronic_structure_method" : "pseudopotential", + + "!num_fv_states" : 40, + + "xc_functionals" : ["XC_LDA_X", "XC_LDA_C_PZ"], + + "smearing_width" : 0.025, + + "use_symmetry" : true, + + "gamma_point" : true, + + "num_mag_dims" : 0, + + "gk_cutoff" : 6.0, + "pw_cutoff" : 20.00, + + "energy_tol" : 1e-8, + "potential_tol" : 1e-8, + + "num_dft_iter" : 100, + + "ngridk" : [1,1,1] + }, + + "iterative_solver" : { + "energy_tolerance" : 1e-2, + "residual_tolerance" : 1e-4, + "num_steps" : 20, + "subspace_size" : 4, + "type" : "davidson", + "converge_by_energy" : 1 + }, + + + "unit_cell" : { + + "lattice_vectors" : [ [0, 3.80402, 3.80402], + [3.80402, 0, 3.80402], + [3.80402, 3.80402, 0] + ], + + "atom_types" : ["Li", "F"], + + "atom_files" : { + "Li" : "Li.pz-s-kjpaw_psl.0.2.1.UPF.json", + "F" : "F.pz-n-kjpaw_psl.0.1.UPF.json" + }, + + "atoms" : { + "F" : [ + [0.51, 0.52, 0.53] + ], + "Li" : [ + [0, 0, 0] + ] + } + }, + + "mixer" : { + "beta" : 0.7, + "type" : "broyden1", + "max_history" : 8 + } + +} diff --git a/verification/test2/sirius.json b/verification/test2/sirius.json index 470290549..862ad529b 100644 --- a/verification/test2/sirius.json +++ b/verification/test2/sirius.json @@ -3,8 +3,9 @@ "processing_unit" : "cpu", "std_evp_solver_type" : "lapack", "gen_evp_solver_type" : "lapack", - "verbosity" : 1 - + "verbosity" : 1, + "print_forces" : true, + "print_stress" : true }, "parameters" : { diff --git a/verification/test3/output_ref.json b/verification/test3/output_ref.json index c657b4226..5165d5977 100644 --- a/verification/test3/output_ref.json +++ b/verification/test3/output_ref.json @@ -4,10 +4,10 @@ "band_evp_work_count": 2744.603259259266, "local_operator_num_applied": 2539 }, - "git_hash": "2e3b15c5ba596aa7175f83b0cfdbc56168d6e822", + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, - "band_gap": 0.340965927447713, + "band_gap": 0.3409659274951225, "chemical_formula": "LiF", "converged": true, "core_leakage": 0.0, @@ -16,657 +16,659 @@ "bxc": 0.0, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": -4.416037977444482, - "ewald": -20.484301851573214, - "exc": -6.991320870244306, - "kin": 16.899908326227326, - "total": -36.6713423793472, - "veff": -21.315946303671808, - "vha": 17.508001915645522, - "vxc": -8.456795799731045 + "eval_sum": -4.416037977790394, + "ewald": -20.484301851573207, + "exc": -6.991320870170582, + "kin": 16.89990832611879, + "total": -36.67134237935083, + "veff": -21.315946303909183, + "vha": 17.50800191491288, + "vxc": -8.456795799630688 }, "fft_coarse_grid": [24,24,24], "fft_grid": [40,40,40], + "forces": [ + [-2.1890667979239621e-16,-2.1310708261563028e-16,-1.6953360768424212e-16], + [8.554446354491911e-15,8.468397124249706e-15,8.675497146268064e-15] + ], "mpi_grid": [1,1], "num_atoms": 2, "num_bands": 15, "num_fv_states": -1, "num_scf_iterations": 9, "omega": 110.0926613870496, - "pw_cutoff": 20.0 + "pw_cutoff": 20.0, + "stress": [ + [0.0005428392605976683,1.0030885127016853e-35,-2.117582368135751e-22], + [1.0030885127016853e-35,0.0005428392605976646,7.058607893785911e-23], + [-2.117582368135751e-22,7.058607893785911e-23,0.0005428392605975642] + ] }, "task": 0, "threads_per_rank": 8, "timers": { "Eigensolver_lapack|zheevr": { - "avg": 0.0002977683615819209, + "avg": 0.0003111694915254237, "count": 177, - "max": 0.000625, - "min": 0.00011, - "total": 0.052704999999999995 + "max": 0.000698, + "min": 0.000109, + "total": 0.055076999999999994 }, "Eigensolver_lapack|zhegvx": { - "avg": 0.00015334090909090907, + "avg": 0.00015968181818181825, "count": 88, - "max": 0.000354, - "min": 0.00013, - "total": 0.013493999999999999 + "max": 0.000355, + "min": 0.000129, + "total": 0.014052000000000005 }, "sddk::FFT3D::FFT3D": { - "avg": 0.0067405, + "avg": 0.0084125, "count": 2, - "max": 0.008333, - "min": 0.005148, - "total": 0.013481 + "max": 0.011725, + "min": 0.0051, + "total": 0.016825 }, "sddk::FFT3D::prepare": { - "avg": 6.07263157894737e-05, + "avg": 6.342105263157901e-05, "count": 190, - "max": 0.000125, - "min": 4.5e-05, - "total": 0.011538000000000003 + "max": 0.000124, + "min": 4e-05, + "total": 0.012050000000000012 }, "sddk::FFT3D::prepare|cpu": { - "avg": 5.610526315789474e-05, + "avg": 5.900526315789475e-05, "count": 190, - "max": 0.000122, - "min": 4.2e-05, - "total": 0.010660000000000001 + "max": 0.000108, + "min": 3.7e-05, + "total": 0.011211000000000002 }, "sddk::FFT3D::transform": { - "avg": 0.00044599406581550285, + "avg": 0.00016691350476533026, "count": 5561, - "max": 0.002556, - "min": 0.000137, - "total": 2.4801730000000113 + "max": 0.000901, + "min": 0.000112, + "total": 0.9282060000000016 }, "sddk::FFT3D::transform_xy": { - "avg": 7.678061499730309e-05, + "avg": 8.078475094407522e-05, "count": 5561, - "max": 0.000559, - "min": 4.9e-05, - "total": 0.4269770000000025 + "max": 0.000554, + "min": 5.4e-05, + "total": 0.4492440000000023 }, "sddk::FFT3D::transform_z": { - "avg": 0.00036266283042618103, + "avg": 8.007139003776334e-05, "count": 5561, - "max": 0.002201, - "min": 6.9e-05, - "total": 2.016767999999993 + "max": 0.000338, + "min": 4.7e-05, + "total": 0.445277000000002 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.0003595583528142414, + "avg": 7.753947131810887e-05, "count": 5561, - "max": 0.002196, - "min": 6.6e-05, - "total": 1.9995039999999964 + "max": 0.000335, + "min": 4.6e-05, + "total": 0.43119700000000344 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.0003561702931127496, + "avg": 7.464448840136706e-05, "count": 5561, - "max": 0.002192, - "min": 6.5e-05, - "total": 1.9806630000000005 + "max": 0.00033, + "min": 4.3e-05, + "total": 0.41509800000000224 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.00018840000000000003, + "avg": 0.00021710000000000002, "count": 20, - "max": 0.000694, + "max": 0.000782, "min": 6e-05, - "total": 0.0037680000000000005 + "total": 0.004342 }, "sddk::Gvec::init": { - "avg": 0.0006617999999999999, + "avg": 0.0006257000000000001, "count": 10, - "max": 0.003501, - "min": 0.000214, - "total": 0.006618 + "max": 0.003186, + "min": 0.000186, + "total": 0.006257 }, "sddk::inner": { - "avg": 2.8975954738330993e-05, + "avg": 3.1465346534653455e-05, "count": 707, - "max": 7.7e-05, + "max": 0.000122, "min": 3e-06, - "total": 0.02048600000000001 + "total": 0.02224599999999999 }, "sddk::inner|local": { - "avg": 2.6223479490806156e-05, + "avg": 2.862517680339461e-05, "count": 707, - "max": 7.3e-05, + "max": 0.000119, "min": 1e-06, - "total": 0.018539999999999952 + "total": 0.02023799999999999 }, "sddk::matrix_storage::matrix_storage": { - "avg": 8.153846153846207e-07, + "avg": 1.0346153846153937e-06, "count": 520, - "max": 3e-06, + "max": 4e-06, "min": 0.0, - "total": 0.00042400000000000277 + "total": 0.0005380000000000047 }, "sddk::matrix_storage::remap_backward": { - "avg": 1.052830188679245e-06, + "avg": 7.471698113207532e-07, "count": 265, - "max": 9e-06, + "max": 7e-06, "min": 0.0, - "total": 0.0002789999999999999 + "total": 0.00019799999999999958 }, "sddk::matrix_storage::remap_forward": { - "avg": 3.973913043478265e-06, + "avg": 3.8057971014492803e-06, "count": 345, - "max": 2.3e-05, - "min": 3e-06, - "total": 0.0013710000000000013 + "max": 1.7e-05, + "min": 2e-06, + "total": 0.0013130000000000017 }, "sddk::matrix_storage::set_num_extra": { - "avg": 1.1213114754098483e-06, + "avg": 8.754098360655819e-07, "count": 610, - "max": 2e-05, + "max": 2e-06, "min": 0.0, - "total": 0.0006840000000000075 + "total": 0.000534000000000005 }, "sddk::orthogonalize": { - "avg": 0.00013737853107344643, + "avg": 0.00014983615819209035, "count": 177, - "max": 0.000288, - "min": 4.7e-05, - "total": 0.024316000000000018 + "max": 0.000337, + "min": 5.1e-05, + "total": 0.026520999999999992 }, "sddk::orthogonalize|tmtrx": { - "avg": 6.830508474576286e-06, + "avg": 8.096045197740114e-06, "count": 177, - "max": 4.7e-05, + "max": 7.8e-05, "min": 1e-06, - "total": 0.0012090000000000026 + "total": 0.0014330000000000002 }, "sddk::orthogonalize|transform": { - "avg": 1.9180790960451957e-05, + "avg": 2.0576271186440682e-05, "count": 177, - "max": 9.8e-05, + "max": 0.000127, "min": 3e-06, - "total": 0.0033949999999999965 + "total": 0.0036420000000000007 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.003041, + "avg": 0.002173, "count": 1, - "max": 0.003041, - "min": 0.003041, - "total": 0.003041 + "max": 0.002173, + "min": 0.002173, + "total": 0.002173 }, "sddk::remap_gvec_to_shells|remap_backward": { - "avg": 0.0008264499999999997, + "avg": 0.00013654999999999999, "count": 20, - "max": 0.000982, - "min": 0.000804, - "total": 0.016528999999999995 + "max": 0.000202, + "min": 0.00012, + "total": 0.002731 }, "sddk::remap_gvec_to_shells|remap_forward": { - "avg": 0.00082685, + "avg": 0.00014439999999999999, "count": 20, - "max": 0.000909, - "min": 0.000816, - "total": 0.016537 + "max": 0.000226, + "min": 0.000129, + "total": 0.0028879999999999995 }, "sddk::transform": { - "avg": 4.6980434782608714e-05, + "avg": 4.983478260869562e-05, "count": 460, - "max": 0.00014, + "max": 0.000136, "min": 1.9e-05, - "total": 0.02161100000000001 + "total": 0.022923999999999986 }, "sddk::transform|init": { - "avg": 5.6630434782608635e-06, + "avg": 5.7760869565217335e-06, "count": 460, - "max": 0.000103, + "max": 2.5e-05, "min": 0.0, - "total": 0.002604999999999997 + "total": 0.0026569999999999975 }, "sddk::transform|local": { - "avg": 1.636679920477145e-05, + "avg": 1.754572564612335e-05, "count": 1006, - "max": 8.7e-05, + "max": 0.000111, "min": 5e-06, - "total": 0.016465000000000077 + "total": 0.01765100000000009 }, "sirius::Atom_type::init": { - "avg": 0.019108, + "avg": 0.019545, "count": 2, - "max": 0.020413, - "min": 0.017803, - "total": 0.038216 + "max": 0.020885, + "min": 0.018205, + "total": 0.03909 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.0057035, + "avg": 0.0058980000000000005, "count": 2, - "max": 0.005821, - "min": 0.005586, - "total": 0.011407 + "max": 0.005992, + "min": 0.005804, + "total": 0.011796000000000001 }, "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { - "avg": 0.003907333333333334, + "avg": 0.004150166666666667, "count": 6, - "max": 0.005454, - "min": 0.003194, - "total": 0.023444000000000003 + "max": 0.004703, + "min": 0.003532, + "total": 0.024901 }, "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { - "avg": 0.0038925, + "avg": 0.004136333333333333, "count": 6, - "max": 0.00543, - "min": 0.003185, - "total": 0.023355 + "max": 0.004689, + "min": 0.003519, + "total": 0.024818 }, "sirius::Augmentation_operator_gvec_deriv|constructor": { - "avg": 0.006785, + "avg": 0.007703, "count": 1, - "max": 0.006785, - "min": 0.006785, - "total": 0.006785 + "max": 0.007703, + "min": 0.007703, + "total": 0.007703 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.03305181249999999, + "avg": 0.017639299999999993, "count": 80, - "max": 0.074075, - "min": 0.015736, - "total": 2.6441449999999995 + "max": 0.04022, + "min": 0.008098, + "total": 1.4111439999999995 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 1.9875000000000002e-05, + "avg": 1.9675000000000007e-05, "count": 80, - "max": 5.3e-05, - "min": 1.7e-05, - "total": 0.00159 + "max": 3.8e-05, + "min": 1.6e-05, + "total": 0.0015740000000000005 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.0002547354085603114, + "avg": 0.00026671595330739277, "count": 257, - "max": 0.000628, + "max": 0.0007, "min": 0.000112, - "total": 0.06546700000000004 + "total": 0.06854599999999994 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.032511675, + "avg": 0.01721595, "count": 80, - "max": 0.073541, - "min": 0.01522, - "total": 2.6009339999999996 + "max": 0.039822, + "min": 0.007608, + "total": 1.3772760000000002 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 4.126506024096387e-05, + "avg": 4.520481927710845e-05, "count": 83, - "max": 0.00017, - "min": 2.5e-05, - "total": 0.003425000000000001 + "max": 0.000189, + "min": 2.6e-05, + "total": 0.0037520000000000014 }, "sirius::Band::initialize_subspace": { - "avg": 0.452813, + "avg": 0.058949, "count": 1, - "max": 0.452813, - "min": 0.452813, - "total": 0.452813 + "max": 0.058949, + "min": 0.058949, + "total": 0.058949 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.056451375000000005, + "avg": 0.007303500000000001, "count": 8, - "max": 0.059119, - "min": 0.050319, - "total": 0.45161100000000004 + "max": 0.007524, + "min": 0.006387, + "total": 0.05842800000000001 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.041864624999999996, + "avg": 0.0003165, "count": 8, - "max": 0.04396, - "min": 0.036939, - "total": 0.33491699999999996 + "max": 0.000328, + "min": 0.000288, + "total": 0.002532 }, "sirius::Band::residuals": { - "avg": 0.0002503229571984438, + "avg": 0.000266649805447471, "count": 257, - "max": 0.000484, + "max": 0.000511, "min": 0.0, - "total": 0.06433300000000006 + "total": 0.06852900000000005 }, "sirius::Band::residuals_aux": { - "avg": 0.00028654497354497355, + "avg": 0.0003069206349206349, "count": 189, - "max": 0.000434, - "min": 0.000241, - "total": 0.054157 + "max": 0.000391, + "min": 0.000246, + "total": 0.058008 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.00014703116147308766, + "avg": 0.00015696883852691214, "count": 353, - "max": 0.000273, + "max": 0.000332, "min": 3.4e-05, - "total": 0.05190199999999995 + "total": 0.05540999999999998 }, "sirius::Band::solve": { - "avg": 0.2660707, + "avg": 0.1421504, "count": 10, - "max": 0.522306, - "min": 0.131996, - "total": 2.660707 + "max": 0.278409, + "min": 0.066818, + "total": 1.421504 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.000301125, + "avg": 0.000274875, "count": 8, - "max": 0.000467, - "min": 0.000198, - "total": 0.002409 + "max": 0.000415, + "min": 0.000178, + "total": 0.002199 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.00022812500000000003, + "avg": 0.000216125, "count": 8, - "max": 0.000348, - "min": 0.000154, - "total": 0.0018250000000000002 + "max": 0.000331, + "min": 0.000133, + "total": 0.001729 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 2.8499999999999965e-07, + "avg": 3.249999999999996e-07, "count": 200, "max": 1e-06, "min": 0.0, - "total": 5.6999999999999935e-05 + "total": 6.499999999999991e-05 }, "sirius::Beta_projectors_base::generate": { - "avg": 4.57403846153846e-05, + "avg": 4.3519230769230766e-05, "count": 104, - "max": 0.000103, - "min": 3.8e-05, - "total": 0.004756999999999998 + "max": 7.6e-05, + "min": 3.5e-05, + "total": 0.004526 }, "sirius::Beta_projectors_base::inner": { - "avg": 2.417724288840259e-05, + "avg": 2.6205689277899318e-05, "count": 457, - "max": 9.5e-05, - "min": 9e-06, - "total": 0.011048999999999984 + "max": 8.7e-05, + "min": 1e-05, + "total": 0.011975999999999988 }, "sirius::Beta_projectors_base::local_inner_aux": { - "avg": 2.1501094091903803e-05, + "avg": 2.3091903719912506e-05, "count": 457, - "max": 8.7e-05, + "max": 8.1e-05, "min": 7e-06, - "total": 0.009826000000000038 + "total": 0.010553000000000015 }, "sirius::Beta_projectors_base::prepare": { - "avg": 1.2500000000000003e-06, + "avg": 1.3750000000000004e-06, "count": 16, "max": 2e-06, "min": 1e-06, - "total": 2.0000000000000005e-05 + "total": 2.2000000000000006e-05 }, "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { - "avg": 0.0007442500000000001, + "avg": 0.000708, "count": 8, - "max": 0.001471, - "min": 0.000563, - "total": 0.005954000000000001 + "max": 0.000791, + "min": 0.000637, + "total": 0.005664 }, "sirius::Broyden1::mix": { - "avg": 0.0009335, + "avg": 0.0009601999999999999, "count": 10, - "max": 0.002494, - "min": 6.5e-05, - "total": 0.009335 + "max": 0.002605, + "min": 6.2e-05, + "total": 0.009602 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.029094, - "count": 1, - "max": 0.029094, - "min": 0.029094, - "total": 0.029094 - }, - "sirius::DFT_ground_state::forces": { - "avg": 6.8e-05, + "avg": 0.000157, "count": 1, - "max": 6.8e-05, - "min": 6.8e-05, - "total": 6.8e-05 + "max": 0.000157, + "min": 0.000157, + "total": 0.000157 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 4.34304, + "avg": 1.797353, "count": 1, - "max": 4.34304, - "min": 4.34304, - "total": 4.34304 + "max": 1.797353, + "min": 1.797353, + "total": 1.797353 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 0.43374880000000005, + "avg": 0.17917259999999996, "count": 10, - "max": 0.689935, - "min": 0.30170400000000003, - "total": 4.3374880000000005 + "max": 0.315713, + "min": 0.102156, + "total": 1.7917259999999997 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.00014496249999999993, + "avg": 0.000155825, "count": 80, - "max": 0.000203, + "max": 0.000248, "min": 0.000128, - "total": 0.011596999999999994 + "total": 0.012466 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.0025574000000000005, + "avg": 0.0012808124999999998, "count": 80, - "max": 0.002744, - "min": 0.002297, - "total": 0.20459200000000002 + "max": 0.001422, + "min": 0.001147, + "total": 0.10246499999999999 }, "sirius::Density::augment": { - "avg": 0.07280829999999999, + "avg": 0.0013455000000000001, "count": 10, - "max": 0.074331, - "min": 0.070378, - "total": 0.7280829999999999 + "max": 0.002553, + "min": 0.001095, + "total": 0.013455000000000002 }, "sirius::Density::generate": { - "avg": 0.0955697, + "avg": 0.013255499999999998, "count": 10, - "max": 0.097459, - "min": 0.09312, - "total": 0.9556969999999999 + "max": 0.014391, + "min": 0.012899, + "total": 0.13255499999999998 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.031609, + "avg": 0.000581, "count": 1, - "max": 0.031609, - "min": 0.031609, - "total": 0.031609 + "max": 0.000581, + "min": 0.000581, + "total": 0.000581 }, "sirius::Density::generate_rho_aug": { - "avg": 0.07273739999999998, + "avg": 0.0012751, "count": 10, - "max": 0.074249, - "min": 0.070303, - "total": 0.7273739999999999 + "max": 0.002477, + "min": 0.001035, + "total": 0.012750999999999998 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.00026485, + "avg": 0.00018800000000000002, "count": 20, - "max": 0.00156, - "min": 0.000108, - "total": 0.0052970000000000005 + "max": 0.001457, + "min": 9.8e-05, + "total": 0.0037600000000000003 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.00031690000000000006, + "avg": 0.0002851, "count": 20, - "max": 0.000361, - "min": 0.000297, - "total": 0.006338000000000001 + "max": 0.000425, + "min": 0.00026, + "total": 0.005702 }, "sirius::Density::generate_valence": { - "avg": 0.0955659, + "avg": 0.013251000000000002, "count": 10, - "max": 0.097456, - "min": 0.093116, - "total": 0.9556589999999999 + "max": 0.014382, + "min": 0.012894, + "total": 0.13251000000000002 }, "sirius::Density::initial_density": { - "avg": 0.03434, + "avg": 0.002077, "count": 1, - "max": 0.03434, - "min": 0.03434, - "total": 0.03434 + "max": 0.002077, + "min": 0.002077, + "total": 0.002077 }, "sirius::Density::symmetrize_density_matrix": { - "avg": 0.0003761, + "avg": 0.00043110000000000007, "count": 10, - "max": 0.000397, - "min": 0.000371, - "total": 0.0037609999999999996 + "max": 0.000553, + "min": 0.000376, + "total": 0.004311000000000001 }, "sirius::Density::update": { - "avg": 0.031627, + "avg": 0.000596, "count": 1, - "max": 0.031627, - "min": 0.031627, - "total": 0.031627 + "max": 0.000596, + "min": 0.000596, + "total": 0.000596 }, "sirius::Field4D::symmetrize": { - "avg": 0.0035644999999999995, + "avg": 0.00218085, "count": 20, - "max": 0.004157, - "min": 0.003438, - "total": 0.07128999999999999 + "max": 0.002467, + "min": 0.002047, + "total": 0.043616999999999996 }, "sirius::Force::calc_forces_core": { - "avg": 0.002587, + "avg": 0.001078, "count": 1, - "max": 0.002587, - "min": 0.002587, - "total": 0.002587 + "max": 0.001078, + "min": 0.001078, + "total": 0.001078 }, "sirius::Force::calc_forces_ewald": { - "avg": 0.127343, + "avg": 0.000539, "count": 1, - "max": 0.127343, - "min": 0.127343, - "total": 0.127343 + "max": 0.000539, + "min": 0.000539, + "total": 0.000539 }, "sirius::Force::calc_forces_nonloc": { - "avg": 0.004576, + "avg": 0.005353, "count": 1, - "max": 0.004576, - "min": 0.004576, - "total": 0.004576 + "max": 0.005353, + "min": 0.005353, + "total": 0.005353 }, "sirius::Force::calc_forces_scf_corr": { - "avg": 0.000422, + "avg": 0.000385, "count": 1, - "max": 0.000422, - "min": 0.000422, - "total": 0.000422 + "max": 0.000385, + "min": 0.000385, + "total": 0.000385 }, "sirius::Force::calc_forces_us": { - "avg": 0.187578, + "avg": 0.00382, "count": 1, - "max": 0.187578, - "min": 0.187578, - "total": 0.187578 + "max": 0.00382, + "min": 0.00382, + "total": 0.00382 }, "sirius::Force::calc_forces_vloc": { - "avg": 0.000451, + "avg": 0.00058, "count": 1, - "max": 0.000451, - "min": 0.000451, - "total": 0.000451 + "max": 0.00058, + "min": 0.00058, + "total": 0.00058 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.009433554716981131, + "avg": 0.00453701886792453, "count": 265, - "max": 0.015638, - "min": 0.001012, - "total": 2.4998919999999996 + "max": 0.007752, + "min": 0.000563, + "total": 1.2023100000000004 }, "sirius::Hamiltonian::get_h_diag": { - "avg": 0.000252825, + "avg": 0.00018992499999999994, "count": 80, - "max": 0.000317, - "min": 0.000225, - "total": 0.020225999999999997 + "max": 0.0003, + "min": 0.000161, + "total": 0.015193999999999996 }, "sirius::Hamiltonian::get_o_diag": { - "avg": 0.0002455750000000001, + "avg": 0.00018598750000000004, "count": 80, - "max": 0.000303, - "min": 0.00022, - "total": 0.019646000000000007 + "max": 0.000237, + "min": 0.00015, + "total": 0.014879000000000003 }, "sirius::K_point::K_point": { - "avg": 1.25e-06, + "avg": 7.499999999999999e-07, "count": 8, - "max": 3e-06, - "min": 1e-06, - "total": 1e-05 + "max": 4e-06, + "min": 0.0, + "total": 5.999999999999999e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.0003455, + "avg": 0.000295375, "count": 8, - "max": 0.00049, - "min": 0.000262, - "total": 0.002764 + "max": 0.000582, + "min": 0.000196, + "total": 0.002363 }, "sirius::K_point::initialize": { - "avg": 0.0008912499999999999, + "avg": 0.00076225, "count": 8, - "max": 0.001278, - "min": 0.000635, - "total": 0.007129999999999999 + "max": 0.001276, + "min": 0.000471, + "total": 0.006098 }, "sirius::K_point::update": { - "avg": 0.00048975, + "avg": 0.0004505, "count": 8, - "max": 0.000706, - "min": 0.000319, - "total": 0.003918 + "max": 0.000664, + "min": 0.000259, + "total": 0.003604 }, "sirius::K_point_set::add_kpoint": { - "avg": 8.375e-06, + "avg": 3.875000000000001e-06, "count": 8, - "max": 2e-05, + "max": 1.6e-05, "min": 2e-06, - "total": 6.7e-05 + "total": 3.100000000000001e-05 }, "sirius::K_point_set::create_k_mesh": { - "avg": 0.035838, + "avg": 0.058027, "count": 1, - "max": 0.035838, - "min": 0.035838, - "total": 0.035838 + "max": 0.058027, + "min": 0.058027, + "total": 0.058027 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 1.0399999999999999e-05, + "avg": 1.0599999999999997e-05, "count": 10, "max": 1.2e-05, - "min": 8e-06, - "total": 0.00010399999999999998 + "min": 7e-06, + "total": 0.00010599999999999997 }, "sirius::K_point_set::initialize": { - "avg": 0.007153, + "avg": 0.006125, "count": 1, - "max": 0.007153, - "min": 0.007153, - "total": 0.007153 + "max": 0.006125, + "min": 0.006125, + "total": 0.006125 }, "sirius::K_point_set::sync_band_energies": { - "avg": 8.2e-06, + "avg": 4.4e-06, "count": 10, - "max": 1.6e-05, - "min": 6e-06, - "total": 8.2e-05 + "max": 1e-05, + "min": 3e-06, + "total": 4.4e-05 }, "sirius::Local_operator::apply_h": { - "avg": 0.009246898113207539, + "avg": 0.004335196226415094, "count": 265, - "max": 0.015435, - "min": 0.000877, - "total": 2.450427999999998 + "max": 0.007509, + "min": 0.000419, + "total": 1.1488269999999998 }, "sirius::Local_operator::prepare": { - "avg": 0.00011901010101010096, + "avg": 4.775757575757575e-05, "count": 99, - "max": 0.001154, + "max": 0.000458, "min": 1e-05, - "total": 0.011781999999999996 + "total": 0.004727999999999999 }, "sirius::Non_local_operator::Non_local_operator": { "avg": 9.545454545454548e-07, @@ -676,354 +678,354 @@ "total": 2.1000000000000006e-05 }, "sirius::Non_local_operator::apply": { - "avg": 7.028301886792455e-05, + "avg": 7.630943396226413e-05, "count": 530, - "max": 0.000136, - "min": 4.9e-05, - "total": 0.03725000000000001 + "max": 0.000156, + "min": 5.3e-05, + "total": 0.04044399999999999 }, "sirius::Periodic_function::add": { - "avg": 6.35909090909091e-05, + "avg": 6.563636363636364e-05, "count": 22, - "max": 8.8e-05, - "min": 4.9e-05, - "total": 0.001399 + "max": 0.000106, + "min": 4.8e-05, + "total": 0.001444 }, "sirius::Periodic_function::inner": { - "avg": 7.60674157303371e-05, + "avg": 7.252808988764047e-05, "count": 89, - "max": 0.000132, - "min": 5.5e-05, - "total": 0.006770000000000002 + "max": 0.000122, + "min": 5.6e-05, + "total": 0.0064550000000000015 }, "sirius::Periodic_function::integrate": { - "avg": 7.172727272727273e-05, + "avg": 6.745454545454546e-05, "count": 11, - "max": 0.000123, - "min": 5.6e-05, - "total": 0.000789 + "max": 9.2e-05, + "min": 5.4e-05, + "total": 0.000742 }, "sirius::Potential::Potential": { - "avg": 0.033802, + "avg": 0.00299, "count": 1, - "max": 0.033802, - "min": 0.033802, - "total": 0.033802 + "max": 0.00299, + "min": 0.00299, + "total": 0.00299 }, "sirius::Potential::generate": { - "avg": 0.05587781818181819, + "avg": 0.015112090909090908, "count": 11, - "max": 0.05916, - "min": 0.049415, - "total": 0.6146560000000001 + "max": 0.017444, + "min": 0.014273, + "total": 0.166233 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.0018698181818181821, + "avg": 0.0014636363636363636, "count": 11, - "max": 0.002016, - "min": 0.001794, - "total": 0.020568000000000003 + "max": 0.003981, + "min": 0.00113, + "total": 0.0161 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 0.009632363636363637, + "avg": 0.01004090909090909, "count": 11, - "max": 0.010738, - "min": 0.009088, - "total": 0.10595600000000001 + "max": 0.010772, + "min": 0.009272, + "total": 0.11044999999999999 }, "sirius::Potential::generate_local_potential": { - "avg": 0.032135, + "avg": 0.001173, "count": 1, - "max": 0.032135, - "min": 0.032135, - "total": 0.032135 + "max": 0.001173, + "min": 0.001173, + "total": 0.001173 }, "sirius::Potential::poisson": { - "avg": 0.03763954545454545, + "avg": 0.000629909090909091, "count": 11, - "max": 0.039471, - "min": 0.031184, - "total": 0.414035 + "max": 0.000826, + "min": 0.000552, + "total": 0.006929000000000001 }, "sirius::Potential::update": { - "avg": 0.03215, + "avg": 0.001188, "count": 1, - "max": 0.03215, - "min": 0.03215, - "total": 0.03215 + "max": 0.001188, + "min": 0.001188, + "total": 0.001188 }, "sirius::Potential::xc": { - "avg": 0.004213000000000001, + "avg": 0.0023762727272727273, "count": 11, - "max": 0.005054, - "min": 0.004029, - "total": 0.046343 + "max": 0.003115, + "min": 0.002079, + "total": 0.026139 }, "sirius::Potential::xc_mt_nonmagnetic": { - "avg": 0.0005432499999999999, + "avg": 0.0005474545454545455, "count": 44, - "max": 0.000779, - "min": 0.000449, - "total": 0.023902999999999994 + "max": 0.000633, + "min": 0.000438, + "total": 0.024088000000000002 }, "sirius::Potential::xc_rg_nonmagnetic": { - "avg": 0.004208818181818182, + "avg": 0.0023716363636363635, "count": 11, - "max": 0.005046, - "min": 0.004025, - "total": 0.046297 + "max": 0.003107, + "min": 0.002076, + "total": 0.026088 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.0641895, + "avg": 0.06637399999999999, "count": 2, - "max": 0.066491, - "min": 0.061888, - "total": 0.128379 + "max": 0.067479, + "min": 0.065269, + "total": 0.13274799999999998 }, "sirius::Radial_integrals|aug": { - "avg": 0.789334, + "avg": 0.806368, "count": 2, - "max": 0.836656, - "min": 0.742012, - "total": 1.578668 + "max": 0.846673, + "min": 0.766063, + "total": 1.612736 }, "sirius::Radial_integrals|beta": { - "avg": 0.10866300000000001, + "avg": 0.10971249999999999, "count": 2, - "max": 0.113056, - "min": 0.10427, - "total": 0.21732600000000002 + "max": 0.115079, + "min": 0.104346, + "total": 0.21942499999999998 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.0457035, + "avg": 0.042702000000000004, "count": 2, - "max": 0.051318, - "min": 0.040089, - "total": 0.091407 + "max": 0.044433, + "min": 0.040971, + "total": 0.08540400000000001 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.035918, + "avg": 0.035387, "count": 1, - "max": 0.035918, - "min": 0.035918, - "total": 0.035918 + "max": 0.035387, + "min": 0.035387, + "total": 0.035387 }, "sirius::Radial_integrals|vloc": { - "avg": 0.13728400000000002, + "avg": 0.1371185, "count": 2, - "max": 0.148418, - "min": 0.12615, - "total": 0.27456800000000003 + "max": 0.146853, + "min": 0.127384, + "total": 0.274237 }, "sirius::Simulation_context::init_atoms_to_grid_idx": { - "avg": 0.001248, + "avg": 0.001196, "count": 1, - "max": 0.001248, - "min": 0.001248, - "total": 0.001248 + "max": 0.001196, + "min": 0.001196, + "total": 0.001196 }, "sirius::Simulation_context::init_comm": { - "avg": 0.000586, + "avg": 0.00028, "count": 1, - "max": 0.000586, - "min": 0.000586, - "total": 0.000586 + "max": 0.00028, + "min": 0.00028, + "total": 0.00028 }, "sirius::Simulation_context::init_fft": { - "avg": 0.053209, + "avg": 0.02344, "count": 1, - "max": 0.053209, - "min": 0.053209, - "total": 0.053209 + "max": 0.02344, + "min": 0.02344, + "total": 0.02344 }, "sirius::Simulation_context::initialize": { - "avg": 2.524069, + "avg": 2.548313, "count": 1, - "max": 2.524069, - "min": 2.524069, - "total": 2.524069 + "max": 2.548313, + "min": 2.548313, + "total": 2.548313 }, "sirius::Simulation_context::make_periodic_function": { - "avg": 0.030802, + "avg": 0.0001398333333333333, "count": 6, - "max": 0.038071, - "min": 0.028671, - "total": 0.184812 + "max": 0.000195, + "min": 0.000107, + "total": 0.0008389999999999999 }, "sirius::Simulation_context::update": { - "avg": 0.074207, + "avg": 0.069036, "count": 1, - "max": 0.074207, - "min": 0.074207, - "total": 0.074207 + "max": 0.069036, + "min": 0.069036, + "total": 0.069036 }, "sirius::Simulation_parameters::import": { - "avg": 0.000335, + "avg": 0.000218, "count": 1, - "max": 0.000335, - "min": 0.000335, - "total": 0.000335 + "max": 0.000218, + "min": 0.000218, + "total": 0.000218 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.0019548433734939757, + "avg": 0.0003855301204819277, "count": 83, - "max": 0.002647, - "min": 0.000771, - "total": 0.16225199999999998 + "max": 0.000957, + "min": 0.000171, + "total": 0.031999 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 2.9e-05, + "avg": 1.45e-05, "count": 2, - "max": 3.1e-05, - "min": 2.7e-05, - "total": 5.8e-05 + "max": 1.5e-05, + "min": 1.4e-05, + "total": 2.9e-05 }, "sirius::Smooth_periodic_function|inner": { - "avg": 7.230327868852463e-05, + "avg": 6.892622950819677e-05, "count": 122, - "max": 0.000128, - "min": 5.4e-05, - "total": 0.008821000000000004 + "max": 0.00011, + "min": 5.5e-05, + "total": 0.008409000000000007 }, "sirius::Stress|ewald": { - "avg": 0.001195, + "avg": 0.000535, "count": 1, - "max": 0.001195, - "min": 0.001195, - "total": 0.001195 + "max": 0.000535, + "min": 0.000535, + "total": 0.000535 }, "sirius::Stress|har": { - "avg": 0.000539, + "avg": 0.000196, "count": 1, - "max": 0.000539, - "min": 0.000539, - "total": 0.000539 + "max": 0.000196, + "min": 0.000196, + "total": 0.000196 }, "sirius::Stress|kin": { - "avg": 0.00107, + "avg": 0.001002, "count": 1, - "max": 0.00107, - "min": 0.00107, - "total": 0.00107 + "max": 0.001002, + "min": 0.001002, + "total": 0.001002 }, "sirius::Stress|nonloc": { - "avg": 0.016585, + "avg": 0.015191, "count": 1, - "max": 0.016585, - "min": 0.016585, - "total": 0.016585 + "max": 0.015191, + "min": 0.015191, + "total": 0.015191 }, "sirius::Stress|us": { - "avg": 0.114048, + "avg": 0.040975, "count": 1, - "max": 0.114048, - "min": 0.114048, - "total": 0.114048 + "max": 0.040975, + "min": 0.040975, + "total": 0.040975 }, "sirius::Stress|us|gemm": { - "avg": 0.0003545555555555556, + "avg": 0.00030683333333333333, "count": 18, - "max": 0.000694, - "min": 0.000277, - "total": 0.0063820000000000005 + "max": 0.000387, + "min": 0.000281, + "total": 0.005523 }, "sirius::Stress|us|phase_fac": { - "avg": 0.034749, + "avg": 8.649999999999999e-05, "count": 2, - "max": 0.039846, - "min": 0.029652, - "total": 0.069498 + "max": 8.9e-05, + "min": 8.4e-05, + "total": 0.00017299999999999998 }, "sirius::Stress|us|prepare": { - "avg": 0.00015927777777777782, + "avg": 9.555555555555558e-05, "count": 18, - "max": 0.000297, - "min": 8.5e-05, - "total": 0.0028670000000000006 + "max": 0.00014, + "min": 7.6e-05, + "total": 0.0017200000000000004 }, "sirius::Stress|vloc": { - "avg": 0.068051, + "avg": 0.000559, "count": 1, - "max": 0.068051, - "min": 0.068051, - "total": 0.068051 + "max": 0.000559, + "min": 0.000559, + "total": 0.000559 }, "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.000265, + "avg": 0.000249, "count": 2, - "max": 0.000391, - "min": 0.000139, - "total": 0.00053 + "max": 0.000395, + "min": 0.000103, + "total": 0.000498 }, "sirius::Unit_cell::get_symmetry": { - "avg": 0.0292235, + "avg": 0.054527, "count": 2, - "max": 0.030457, - "min": 0.02799, - "total": 0.058447 + "max": 0.055309, + "min": 0.053745, + "total": 0.109054 }, "sirius::Unit_cell::initialize": { - "avg": 0.069107, + "avg": 0.094835, "count": 1, - "max": 0.069107, - "min": 0.069107, - "total": 0.069107 + "max": 0.094835, + "min": 0.094835, + "total": 0.094835 }, "sirius::Unit_cell::update": { - "avg": 0.0295, + "avg": 0.054785, "count": 2, - "max": 0.030862, - "min": 0.028138, - "total": 0.059 + "max": 0.055714, + "min": 0.053856, + "total": 0.10957 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { - "avg": 0.0291895, + "avg": 0.054472, "count": 2, - "max": 0.03043, - "min": 0.027949, - "total": 0.058379 + "max": 0.055284, + "min": 0.05366, + "total": 0.108944 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { - "avg": 0.0289935, + "avg": 0.0542445, "count": 2, - "max": 0.030233, - "min": 0.027754, - "total": 0.057987 + "max": 0.055014, + "min": 0.053475, + "total": 0.108489 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { - "avg": 5.1e-05, + "avg": 4.7499999999999996e-05, "count": 2, - "max": 7.6e-05, + "max": 6.9e-05, "min": 2.6e-05, - "total": 0.000102 + "total": 9.499999999999999e-05 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { - "avg": 9.449999999999999e-05, + "avg": 0.0001275, "count": 2, - "max": 0.000114, - "min": 7.5e-05, - "total": 0.00018899999999999999 + "max": 0.00013, + "min": 0.000125, + "total": 0.000255 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { - "avg": 2.8499999999999998e-05, + "avg": 4.05e-05, "count": 2, - "max": 3.5e-05, - "min": 2.2e-05, - "total": 5.6999999999999996e-05 + "max": 5.7e-05, + "min": 2.4e-05, + "total": 8.1e-05 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw": { - "avg": 0.0035606500000000007, + "avg": 0.00217705, "count": 20, - "max": 0.004152, - "min": 0.003433, - "total": 0.07121300000000001 + "max": 0.002463, + "min": 0.002044, + "total": 0.043540999999999996 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw|local": { - "avg": 0.0018788000000000003, + "avg": 0.0018661000000000003, "count": 20, - "max": 0.002471, - "min": 0.001781, - "total": 0.037576000000000005 + "max": 0.002082, + "min": 0.001761, + "total": 0.03732200000000001 } } } \ No newline at end of file diff --git a/verification/test4/output_ref.json b/verification/test4/output_ref.json index f66be8551..101aa3194 100644 --- a/verification/test4/output_ref.json +++ b/verification/test4/output_ref.json @@ -1,821 +1,1024 @@ { - "build_date": "Thu, 15 Feb 2018 13:44:11", "comm_world_size": 1, - "git_hash": "fd622cf4fe7411690c449c2f12c73d1fd5d5ce8c", + "counters": { + "band_evp_work_count": 335.93125925925926, + "local_operator_num_applied": 314 + }, + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, - "band_gap": 0.276170405210475, + "band_gap": 0.27615812428587005, "chemical_formula": "LiF", + "converged": true, "core_leakage": 0.0, "efermi": 0.254296875, "energy": { "bxc": 0.0, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": -3.42682503713329, - "ewald": -20.4843019589124, - "exc": -7.17309490638858, - "kin": 17.8025053643813, - "total": -36.4161146670573, - "veff": -21.2293304015146, - "vha": 19.0793649487082, - "vxc": -8.69121498898831 - }, - "fft_coarse_grid": [24, 24, 24], - "fft_grid": [36, 36, 36], - "mpi_grid": [1, 1], + "eval_sum": -3.4267088452645313, + "ewald": -20.484301851573214, + "exc": -7.173121422103247, + "kin": 17.802719610169564, + "total": -36.41611384304106, + "veff": -21.229428455434096, + "vha": 19.079606227837427, + "vxc": -8.691247619129566 + }, + "fft_coarse_grid": [24,24,24], + "fft_grid": [40,40,40], + "forces": [ + [-2.340186425775803e-16,-2.604825941478538e-16,-4.694223837446976e-17], + [9.197057047046641e-15,9.179942961998526e-15,9.176637104855076e-15] + ], + "mpi_grid": [1,1], "num_atoms": 2, "num_bands": 15, "num_fv_states": -1, - "omega": 110.09266138705, - "pw_cutoff": 20.0 + "num_scf_iterations": 10, + "omega": 110.0926613870496, + "pw_cutoff": 20.0, + "stress": [ + [-0.002295222311266679,2.507721281754213e-36,6.548513182776669e-25], + [2.507721281754213e-36,-0.002295222311266679,-1.2063050599781808e-24], + [6.548513182776669e-25,-1.2063050599781808e-24,-0.002295222311266704] + ] }, "task": 0, "threads_per_rank": 8, "timers": { - "+global_timer": { - "avg": 3.062485, - "count": 1, - "max": 3.062485, - "min": 3.062485, - "total": 3.062485 - }, - "Eigensolver_lapack::solve_std": { - "avg": 0.0003705, - "count": 20, - "max": 0.000605, - "min": 0.000138, - "total": 0.00741 - }, - "Eigensolver_lapack::solve_std|dsyevr": { - "avg": 0.0003652, - "count": 20, - "max": 0.0006, + "Eigensolver_lapack|dsyevr": { + "avg": 0.0006123809523809522, + "count": 21, + "max": 0.002533, + "min": 0.00019, + "total": 0.012859999999999996 + }, + "Eigensolver_lapack|dsygvx": { + "avg": 0.0009450000000000002, + "count": 12, + "max": 0.009475, "min": 0.000133, - "total": 0.007304 + "total": 0.011340000000000003 }, "sddk::FFT3D::FFT3D": { - "avg": 0.0024195, + "avg": 0.006924, "count": 2, - "max": 0.002844, - "min": 0.001995, - "total": 0.004839 + "max": 0.008964, + "min": 0.004884, + "total": 0.013848 }, "sddk::FFT3D::prepare": { - "avg": 5.03617021276596e-05, + "avg": 6.223404255319148e-05, "count": 47, - "max": 0.000156, - "min": 3e-05, - "total": 0.002367 + "max": 0.000146, + "min": 4e-05, + "total": 0.0029249999999999996 }, "sddk::FFT3D::prepare|cpu": { - "avg": 4.75531914893617e-05, + "avg": 5.734042553191491e-05, "count": 47, - "max": 0.000148, - "min": 2.8e-05, - "total": 0.002235 + "max": 0.000132, + "min": 3.7e-05, + "total": 0.0026950000000000008 }, "sddk::FFT3D::transform": { - "avg": 0.000377879746835443, - "count": 474, - "max": 0.003159, - "min": 0.000217, - "total": 0.179115 + "avg": 0.00026023217922606937, + "count": 491, + "max": 0.001409, + "min": 0.000112, + "total": 0.12777400000000005 }, "sddk::FFT3D::transform_xy": { - "avg": 0.000178860759493671, - "count": 474, - "max": 0.003022, - "min": 0.000113, - "total": 0.0847799999999999 + "avg": 0.00013553767820773918, + "count": 491, + "max": 0.000887, + "min": 6.1e-05, + "total": 0.06654899999999994 }, "sddk::FFT3D::transform_z": { - "avg": 0.000118913612565445, - "count": 764, - "max": 0.000324, - "min": 7.4e-05, - "total": 0.0908500000000001 + "avg": 7.410838709677422e-05, + "count": 775, + "max": 0.000513, + "min": 3.9e-05, + "total": 0.05743400000000002 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.000116658376963351, - "count": 764, - "max": 0.00032, - "min": 7.2e-05, - "total": 0.089127 + "avg": 7.153548387096769e-05, + "count": 775, + "max": 0.000509, + "min": 3.6e-05, + "total": 0.05543999999999996 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.000114230366492146, - "count": 764, - "max": 0.000313, - "min": 7.1e-05, - "total": 0.0872719999999999 + "avg": 6.866322580645152e-05, + "count": 775, + "max": 0.000504, + "min": 3.5e-05, + "total": 0.05321399999999993 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.000324333333333333, - "count": 3, - "max": 0.00059, - "min": 0.000138, - "total": 0.000973 + "avg": 0.0003675, + "count": 6, + "max": 0.000699, + "min": 0.000112, + "total": 0.002205 }, "sddk::Gvec::init": { - "avg": 0.00104866666666667, + "avg": 0.0011543333333333334, "count": 3, - "max": 0.002213, - "min": 0.000244, - "total": 0.003146 + "max": 0.002627, + "min": 0.000201, + "total": 0.003463 + }, + "sddk::inner": { + "avg": 1.9666666666666666e-05, + "count": 87, + "max": 4.8e-05, + "min": 2e-06, + "total": 0.001711 }, - "sddk::Wave_functions::orthogonalize": { - "avg": 0.00024635, - "count": 20, - "max": 0.000451, - "min": 3.4e-05, - "total": 0.004927 + "sddk::inner|local": { + "avg": 1.685057471264368e-05, + "count": 87, + "max": 4.5e-05, + "min": 1e-06, + "total": 0.001466 }, "sddk::matrix_storage::matrix_storage": { - "avg": 8.59154929577464e-07, + "avg": 1.2112676056338012e-06, "count": 71, - "max": 6e-06, + "max": 5e-06, "min": 0.0, - "total": 6.09999999999999e-05 + "total": 8.599999999999988e-05 }, "sddk::matrix_storage::remap_backward": { - "avg": 1.03125e-06, - "count": 32, - "max": 1.5e-05, + "avg": 9.393939393939397e-07, + "count": 33, + "max": 8e-06, "min": 0.0, - "total": 3.3e-05 + "total": 3.100000000000001e-05 }, "sddk::matrix_storage::remap_forward": { - "avg": 1.41860465116279e-06, - "count": 43, + "avg": 3.4772727272727272e-06, + "count": 44, + "max": 7e-06, + "min": 2e-06, + "total": 0.000153 + }, + "sddk::matrix_storage::set_num_extra": { + "avg": 8.051948051948044e-07, + "count": 77, "max": 2e-06, + "min": 0.0, + "total": 6.199999999999993e-05 + }, + "sddk::orthogonalize": { + "avg": 0.00016814285714285718, + "count": 21, + "max": 0.001719, + "min": 4.3e-05, + "total": 0.0035310000000000007 + }, + "sddk::orthogonalize|tmtrx": { + "avg": 3.7142857142857138e-06, + "count": 21, + "max": 2.1e-05, "min": 1e-06, - "total": 6.1e-05 + "total": 7.799999999999999e-05 + }, + "sddk::orthogonalize|transform": { + "avg": 8.414285714285716e-05, + "count": 21, + "max": 0.001595, + "min": 2e-06, + "total": 0.0017670000000000003 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.001587, + "avg": 0.001674, "count": 1, - "max": 0.001587, - "min": 0.001587, - "total": 0.001587 + "max": 0.001674, + "min": 0.001674, + "total": 0.001674 }, "sddk::remap_gvec_to_shells|remap_backward": { - "avg": 0.000168363636363636, + "avg": 0.00013859090909090906, "count": 22, - "max": 0.000214, - "min": 0.000149, - "total": 0.003704 + "max": 0.000212, + "min": 0.000121, + "total": 0.0030489999999999996 }, "sddk::remap_gvec_to_shells|remap_forward": { - "avg": 0.000168, + "avg": 0.00014581818181818183, "count": 22, - "max": 0.00018, - "min": 0.000157, - "total": 0.003696 + "max": 0.000236, + "min": 0.000129, + "total": 0.003208 + }, + "sddk::transform": { + "avg": 3.2508474576271186e-05, + "count": 59, + "max": 8e-05, + "min": 1.5e-05, + "total": 0.0019179999999999998 }, - "sddk::wave_functions::inner": { - "avg": 6.54761904761905e-05, - "count": 84, - "max": 0.000165, - "min": 1e-06, - "total": 0.0055 - }, - "sddk::wave_functions::transform": { - "avg": 9.88596491228071e-05, - "count": 57, - "max": 0.000232, - "min": 1.3e-05, - "total": 0.005635 - }, - "sddk::wave_functions::transform|init": { - "avg": 5.01754385964913e-06, - "count": 57, - "max": 8.2e-05, + "sddk::transform|init": { + "avg": 2.813559322033896e-06, + "count": 59, + "max": 1e-05, "min": 0.0, - "total": 0.000286 + "total": 0.00016599999999999986 + }, + "sddk::transform|local": { + "avg": 1.15511811023622e-05, + "count": 127, + "max": 5.2e-05, + "min": 3e-06, + "total": 0.0014669999999999996 }, "sirius::Atom_type::init": { - "avg": 0.017798, + "avg": 0.019752, "count": 2, - "max": 0.018811, - "min": 0.016785, - "total": 0.035596 + "max": 0.019803, + "min": 0.019701, + "total": 0.039504 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.0073515, + "avg": 0.005488, "count": 2, - "max": 0.007622, - "min": 0.007081, - "total": 0.014703 - }, - "sirius::Band::diag_pseudo_potential": { - "avg": 0.0165627272727273, - "count": 11, - "max": 0.035488, - "min": 0.007715, - "total": 0.18219 + "max": 0.005533, + "min": 0.005443, + "total": 0.010976 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { + "avg": 0.0036569999999999992, + "count": 6, + "max": 0.004263, + "min": 0.003204, + "total": 0.021941999999999996 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { + "avg": 0.003644666666666667, + "count": 6, + "max": 0.004247, + "min": 0.003193, + "total": 0.021868000000000002 + }, + "sirius::Augmentation_operator_gvec_deriv|constructor": { + "avg": 0.007172, + "count": 1, + "max": 0.007172, + "min": 0.007172, + "total": 0.007172 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.0164910909090909, + "avg": 0.013415545454545458, "count": 11, - "max": 0.035413, - "min": 0.00764, - "total": 0.181402 + "max": 0.032006, + "min": 0.006328, + "total": 0.14757100000000004 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 7.63636363636363e-06, + "avg": 2.1181818181818184e-05, "count": 11, - "max": 1e-05, - "min": 6e-06, - "total": 8.4e-05 + "max": 2.9e-05, + "min": 1.8e-05, + "total": 0.00023300000000000003 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.000292225806451613, - "count": 31, - "max": 0.000607, - "min": 0.00014, - "total": 0.009059 + "avg": 0.0004633124999999999, + "count": 32, + "max": 0.002537, + "min": 0.000135, + "total": 0.014825999999999997 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.0161244545454545, + "avg": 0.013084727272727274, "count": 11, - "max": 0.035056, - "min": 0.007244, - "total": 0.177369 + "max": 0.031677, + "min": 0.00598, + "total": 0.143932 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 0.000114636363636364, - "count": 11, - "max": 0.000227, - "min": 6.4e-05, - "total": 0.001261 - }, - "sirius::Band::diag_pseudo_potential_davidson|wf": { - "avg": 1.91818181818182e-05, - "count": 11, - "max": 3.1e-05, - "min": 1.5e-05, - "total": 0.000211 - }, - "sirius::Band::get_h_diag": { - "avg": 0.000162181818181818, + "avg": 2.9636363636363638e-05, "count": 11, - "max": 0.0002, - "min": 0.000128, - "total": 0.001784 - }, - "sirius::Band::get_o_diag": { - "avg": 0.000153545454545455, - "count": 11, - "max": 0.000175, - "min": 0.000121, - "total": 0.001689 + "max": 5.9e-05, + "min": 2e-05, + "total": 0.000326 }, "sirius::Band::initialize_subspace": { - "avg": 0.008579, + "avg": 0.014666, "count": 1, - "max": 0.008579, - "min": 0.008579, - "total": 0.008579 + "max": 0.014666, + "min": 0.014666, + "total": 0.014666 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.007934, + "avg": 0.014269, "count": 1, - "max": 0.007934, - "min": 0.007934, - "total": 0.007934 + "max": 0.014269, + "min": 0.014269, + "total": 0.014269 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.000396, + "avg": 0.000212, "count": 1, - "max": 0.000396, - "min": 0.000396, - "total": 0.000396 + "max": 0.000212, + "min": 0.000212, + "total": 0.000212 }, "sirius::Band::residuals": { - "avg": 0.00026358064516129, - "count": 31, - "max": 0.000469, + "avg": 0.00026703125000000007, + "count": 32, + "max": 0.000384, "min": 0.0, - "total": 0.008171 + "total": 0.008545000000000002 }, "sirius::Band::residuals_aux": { - "avg": 0.00023528, - "count": 25, - "max": 0.000306, - "min": 0.000155, - "total": 0.005882 + "avg": 0.0002873461538461538, + "count": 26, + "max": 0.00035, + "min": 0.000248, + "total": 0.007470999999999999 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.000166704545454545, - "count": 44, - "max": 0.000349, - "min": 8.4e-05, - "total": 0.007335 - }, - "sirius::Band::solve_for_kset": { - "avg": 0.0170530909090909, + "avg": 0.00013151111111111112, + "count": 45, + "max": 0.000248, + "min": 2e-05, + "total": 0.0059180000000000005 + }, + "sirius::Band::solve": { + "avg": 0.013831727272727273, "count": 11, - "max": 0.035964, - "min": 0.008218, - "total": 0.187584 + "max": 0.03243, + "min": 0.006752, + "total": 0.152149 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.000204, + "avg": 0.000274, "count": 1, - "max": 0.000204, - "min": 0.000204, - "total": 0.000204 + "max": 0.000274, + "min": 0.000274, + "total": 0.000274 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.0002, + "avg": 0.000224, "count": 1, - "max": 0.0002, - "min": 0.0002, - "total": 0.0002 + "max": 0.000224, + "min": 0.000224, + "total": 0.000224 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 2.60869565217391e-07, - "count": 23, + "avg": 3.703703703703704e-07, + "count": 27, "max": 1e-06, "min": 0.0, - "total": 6e-06 + "total": 1e-05 }, "sirius::Beta_projectors_base::generate": { - "avg": 2.62790697674419e-05, - "count": 43, - "max": 3.6e-05, - "min": 2.3e-05, - "total": 0.00113 + "avg": 2.3076923076923076e-05, + "count": 13, + "max": 4e-05, + "min": 1.9e-05, + "total": 0.0003 }, "sirius::Beta_projectors_base::inner": { - "avg": 5.31162790697674e-05, - "count": 43, - "max": 9.3e-05, + "avg": 1.415517241379311e-05, + "count": 58, + "max": 3.6e-05, "min": 7e-06, - "total": 0.002284 + "total": 0.0008210000000000003 }, "sirius::Beta_projectors_base::prepare": { - "avg": 8.69565217391305e-07, - "count": 23, - "max": 6e-06, - "min": 0.0, - "total": 2e-05 + "avg": 2.4999999999999998e-06, + "count": 2, + "max": 3e-06, + "min": 2e-06, + "total": 4.9999999999999996e-06 + }, + "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { + "avg": 0.00035, + "count": 1, + "max": 0.00035, + "min": 0.00035, + "total": 0.00035 }, "sirius::Broyden1::mix": { - "avg": 0.000164727272727273, + "avg": 0.001058, "count": 11, - "max": 0.000432, - "min": 1.2e-05, - "total": 0.001812 + "max": 0.002441, + "min": 7.2e-05, + "total": 0.011637999999999999 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.000514, + "avg": 0.000532, "count": 1, - "max": 0.000514, - "min": 0.000514, - "total": 0.000514 + "max": 0.000532, + "min": 0.000532, + "total": 0.000532 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 1.034754, + "avg": 0.450065, "count": 1, - "max": 1.034754, - "min": 1.034754, - "total": 1.034754 + "max": 0.450065, + "min": 0.450065, + "total": 0.450065 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 0.0936983636363636, + "avg": 0.040435454545454545, "count": 11, - "max": 0.111601, - "min": 0.079896, - "total": 1.030682 - }, - "sirius::DFT_ground_state::symmetrize": { - "avg": 0.0220868181818182, - "count": 22, - "max": 0.026572, - "min": 0.019499, - "total": 0.48591 + "max": 0.059229, + "min": 0.032126, + "total": 0.44478999999999996 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.000184181818181818, + "avg": 0.00014745454545454548, "count": 11, - "max": 0.000282, - "min": 0.000147, - "total": 0.002026 + "max": 0.000189, + "min": 0.000118, + "total": 0.0016220000000000002 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.00174572727272727, + "avg": 0.0011972727272727272, "count": 11, - "max": 0.002291, - "min": 0.00152, - "total": 0.019203 + "max": 0.001347, + "min": 0.001098, + "total": 0.013169999999999998 }, "sirius::Density::augment": { - "avg": 0.00249590909090909, + "avg": 0.0012806363636363636, "count": 11, - "max": 0.004303, - "min": 0.002087, - "total": 0.027455 + "max": 0.002578, + "min": 0.00107, + "total": 0.014086999999999999 }, "sirius::Density::generate": { - "avg": 0.004923, + "avg": 0.0029347272727272724, "count": 11, - "max": 0.006403, - "min": 0.004292, - "total": 0.054153 + "max": 0.00415, + "min": 0.002593, + "total": 0.032282 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.03127, + "avg": 0.000994, "count": 1, - "max": 0.03127, - "min": 0.03127, - "total": 0.03127 + "max": 0.000994, + "min": 0.000994, + "total": 0.000994 }, "sirius::Density::generate_rho_aug": { - "avg": 0.00242654545454546, + "avg": 0.0012136363636363636, "count": 11, - "max": 0.004203, - "min": 0.002028, - "total": 0.026692 + "max": 0.002508, + "min": 0.001014, + "total": 0.013349999999999999 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.000611181818181818, + "avg": 0.00017972727272727273, "count": 22, - "max": 0.002014, - "min": 0.000462, - "total": 0.013446 - }, - "sirius::Density::generate_rho_aug|phase_fac": { - "avg": 0.000101272727272727, - "count": 22, - "max": 0.000138, - "min": 7.6e-05, - "total": 0.002228 + "max": 0.001567, + "min": 9.6e-05, + "total": 0.003954 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.000436727272727273, + "avg": 0.00027481818181818185, "count": 22, - "max": 0.000659, - "min": 0.000325, - "total": 0.009608 + "max": 0.000314, + "min": 0.000252, + "total": 0.006046 }, "sirius::Density::generate_valence": { - "avg": 0.00491854545454545, + "avg": 0.0029300909090909095, "count": 11, - "max": 0.006395, - "min": 0.004288, - "total": 0.054104 + "max": 0.004143, + "min": 0.002588, + "total": 0.032231 }, "sirius::Density::initial_density": { - "avg": 0.038793, + "avg": 0.002829, "count": 1, - "max": 0.038793, - "min": 0.038793, - "total": 0.038793 + "max": 0.002829, + "min": 0.002829, + "total": 0.002829 }, "sirius::Density::symmetrize_density_matrix": { - "avg": 0.000452090909090909, + "avg": 0.00040354545454545455, "count": 11, - "max": 0.000748, + "max": 0.000486, + "min": 0.000375, + "total": 0.004439 + }, + "sirius::Density::update": { + "avg": 0.001022, + "count": 1, + "max": 0.001022, + "min": 0.001022, + "total": 0.001022 + }, + "sirius::Field4D::symmetrize": { + "avg": 0.002141363636363636, + "count": 22, + "max": 0.002597, + "min": 0.002021, + "total": 0.04711 + }, + "sirius::Force::calc_forces_core": { + "avg": 0.000976, + "count": 1, + "max": 0.000976, + "min": 0.000976, + "total": 0.000976 + }, + "sirius::Force::calc_forces_ewald": { + "avg": 0.000573, + "count": 1, + "max": 0.000573, + "min": 0.000573, + "total": 0.000573 + }, + "sirius::Force::calc_forces_nonloc": { + "avg": 0.000529, + "count": 1, + "max": 0.000529, + "min": 0.000529, + "total": 0.000529 + }, + "sirius::Force::calc_forces_scf_corr": { + "avg": 0.000374, + "count": 1, + "max": 0.000374, "min": 0.000374, - "total": 0.004973 + "total": 0.000374 + }, + "sirius::Force::calc_forces_us": { + "avg": 0.0033, + "count": 1, + "max": 0.0033, + "min": 0.0033, + "total": 0.0033 + }, + "sirius::Force::calc_forces_vloc": { + "avg": 0.000554, + "count": 1, + "max": 0.000554, + "min": 0.000554, + "total": 0.000554 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.00477925, - "count": 32, - "max": 0.007662, - "min": 0.000753, - "total": 0.152936 + "avg": 0.0034669696969696975, + "count": 33, + "max": 0.005682, + "min": 0.000531, + "total": 0.11441000000000001 + }, + "sirius::Hamiltonian::get_h_diag": { + "avg": 0.0001420909090909091, + "count": 11, + "max": 0.000165, + "min": 0.000125, + "total": 0.001563 + }, + "sirius::Hamiltonian::get_o_diag": { + "avg": 0.0001389090909090909, + "count": 11, + "max": 0.000167, + "min": 0.000122, + "total": 0.001528 }, "sirius::K_point::K_point": { - "avg": 1e-06, + "avg": 3e-06, "count": 1, - "max": 1e-06, - "min": 1e-06, - "total": 1e-06 + "max": 3e-06, + "min": 3e-06, + "total": 3e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.000251, + "avg": 0.000213, "count": 1, - "max": 0.000251, - "min": 0.000251, - "total": 0.000251 + "max": 0.000213, + "min": 0.000213, + "total": 0.000213 }, "sirius::K_point::initialize": { - "avg": 0.000505, + "avg": 0.000758, "count": 1, - "max": 0.000505, - "min": 0.000505, - "total": 0.000505 + "max": 0.000758, + "min": 0.000758, + "total": 0.000758 }, - "sirius::K_point_set::K_point_set": { - "avg": 0.020849, + "sirius::K_point::update": { + "avg": 0.000517, "count": 1, - "max": 0.020849, - "min": 0.020849, - "total": 0.020849 + "max": 0.000517, + "min": 0.000517, + "total": 0.000517 }, "sirius::K_point_set::add_kpoint": { - "avg": 1.1e-05, + "avg": 1.5e-05, "count": 1, - "max": 1.1e-05, - "min": 1.1e-05, - "total": 1.1e-05 + "max": 1.5e-05, + "min": 1.5e-05, + "total": 1.5e-05 + }, + "sirius::K_point_set::create_k_mesh": { + "avg": 0.054623, + "count": 1, + "max": 0.054623, + "min": 0.054623, + "total": 0.054623 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 1.30909090909091e-05, + "avg": 7.3636363636363614e-06, "count": 11, - "max": 3.1e-05, - "min": 5e-06, - "total": 0.000144 + "max": 1.7e-05, + "min": 3e-06, + "total": 8.099999999999998e-05 + }, + "sirius::K_point_set::initialize": { + "avg": 0.000769, + "count": 1, + "max": 0.000769, + "min": 0.000769, + "total": 0.000769 }, "sirius::K_point_set::sync_band_energies": { - "avg": 3.09090909090909e-06, + "avg": 3e-06, "count": 11, - "max": 4e-06, + "max": 5e-06, "min": 2e-06, - "total": 3.4e-05 + "total": 3.3e-05 }, "sirius::Local_operator::apply_h": { - "avg": 0.004505625, - "count": 32, - "max": 0.007325, - "min": 0.000634, - "total": 0.14418 + "avg": 0.003302, + "count": 33, + "max": 0.005487, + "min": 0.000396, + "total": 0.108966 }, "sirius::Local_operator::prepare": { - "avg": 0.000235333333333333, + "avg": 0.00016312499999999997, "count": 24, - "max": 0.000618, - "min": 7e-06, - "total": 0.005648 + "max": 0.000398, + "min": 6e-06, + "total": 0.003914999999999999 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 9.58333333333333e-07, + "avg": 9.166666666666667e-07, "count": 24, "max": 2e-06, "min": 0.0, - "total": 2.3e-05 + "total": 2.2000000000000003e-05 }, "sirius::Non_local_operator::apply": { - "avg": 8.2453125e-05, - "count": 64, - "max": 0.000129, - "min": 3.3e-05, - "total": 0.005277 + "avg": 6.419696969696973e-05, + "count": 66, + "max": 9.3e-05, + "min": 4.7e-05, + "total": 0.004237000000000002 }, "sirius::Periodic_function::add": { - "avg": 7.4875e-05, + "avg": 6.358333333333334e-05, "count": 24, - "max": 0.000117, - "min": 4.5e-05, - "total": 0.001797 + "max": 9.5e-05, + "min": 5.3e-05, + "total": 0.0015260000000000002 }, "sirius::Periodic_function::inner": { - "avg": 0.000159355555555556, - "count": 225, - "max": 0.000352, - "min": 8.7e-05, - "total": 0.035855 + "avg": 6.87319587628866e-05, + "count": 97, + "max": 0.000107, + "min": 5.5e-05, + "total": 0.006667000000000001 }, "sirius::Periodic_function::integrate": { - "avg": 0.000117909090909091, - "count": 11, - "max": 0.000153, - "min": 8.6e-05, - "total": 0.001297 + "avg": 7.15e-05, + "count": 12, + "max": 0.00011, + "min": 5.4e-05, + "total": 0.000858 }, "sirius::Potential::Potential": { - "avg": 0.409009, + "avg": 0.003683, "count": 1, - "max": 0.409009, - "min": 0.409009, - "total": 0.409009 + "max": 0.003683, + "min": 0.003683, + "total": 0.003683 }, "sirius::Potential::generate": { - "avg": 0.0221041666666667, + "avg": 0.014817583333333335, "count": 12, - "max": 0.026232, - "min": 0.020916, - "total": 0.26525 + "max": 0.016143, + "min": 0.013946, + "total": 0.17781100000000002 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.00194875, + "avg": 0.0011588333333333335, "count": 12, - "max": 0.002617, - "min": 0.001727, - "total": 0.023385 + "max": 0.001263, + "min": 0.001097, + "total": 0.013906000000000002 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 0.0161044166666667, + "avg": 0.010035750000000001, "count": 12, - "max": 0.016761, - "min": 0.015122, - "total": 0.193253 + "max": 0.010462, + "min": 0.009457, + "total": 0.12042900000000002 }, "sirius::Potential::generate_local_potential": { - "avg": 0.408527, + "avg": 0.001995, "count": 1, - "max": 0.408527, - "min": 0.408527, - "total": 0.408527 - }, - "sirius::Potential::init": { - "avg": 4e-06, - "count": 1, - "max": 4e-06, - "min": 4e-06, - "total": 4e-06 + "max": 0.001995, + "min": 0.001995, + "total": 0.001995 }, "sirius::Potential::poisson": { - "avg": 0.000935083333333333, + "avg": 0.0006388333333333334, "count": 12, - "max": 0.001052, - "min": 0.000806, - "total": 0.011221 + "max": 0.000754, + "min": 0.000583, + "total": 0.007666 + }, + "sirius::Potential::update": { + "avg": 0.00201, + "count": 1, + "max": 0.00201, + "min": 0.00201, + "total": 0.00201 }, "sirius::Potential::xc": { - "avg": 0.00217275, + "avg": 0.0023838333333333333, "count": 12, - "max": 0.004869, - "min": 0.001668, - "total": 0.026073 + "max": 0.004, + "min": 0.002064, + "total": 0.028606 }, "sirius::Potential::xc_mt_nonmagnetic": { - "avg": 0.0006804375, + "avg": 0.0005437708333333333, "count": 48, - "max": 0.00087, - "min": 0.000523, - "total": 0.032661 + "max": 0.000628, + "min": 0.000463, + "total": 0.026101 }, "sirius::Potential::xc_rg_nonmagnetic": { - "avg": 0.00216591666666667, + "avg": 0.002379333333333333, "count": 12, - "max": 0.004863, - "min": 0.001661, - "total": 0.025991 + "max": 0.003992, + "min": 0.002059, + "total": 0.028551999999999998 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.048054, - "count": 1, - "max": 0.048054, - "min": 0.048054, - "total": 0.048054 + "avg": 0.069627, + "count": 2, + "max": 0.071044, + "min": 0.06821, + "total": 0.139254 }, "sirius::Radial_integrals|aug": { - "avg": 0.891279, - "count": 1, - "max": 0.891279, - "min": 0.891279, - "total": 0.891279 + "avg": 0.8020525000000001, + "count": 2, + "max": 0.840332, + "min": 0.763773, + "total": 1.6041050000000001 }, "sirius::Radial_integrals|beta": { - "avg": 0.1013135, + "avg": 0.108039, "count": 2, - "max": 0.105756, - "min": 0.096871, - "total": 0.202627 + "max": 0.111039, + "min": 0.105039, + "total": 0.216078 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.030279, - "count": 1, - "max": 0.030279, - "min": 0.030279, - "total": 0.030279 + "avg": 0.0444115, + "count": 2, + "max": 0.046712, + "min": 0.042111, + "total": 0.088823 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.035409, + "avg": 0.035736, "count": 1, - "max": 0.035409, - "min": 0.035409, - "total": 0.035409 + "max": 0.035736, + "min": 0.035736, + "total": 0.035736 }, "sirius::Radial_integrals|vloc": { - "avg": 0.406946, - "count": 1, - "max": 0.406946, - "min": 0.406946, - "total": 0.406946 + "avg": 0.1375605, + "count": 2, + "max": 0.148243, + "min": 0.126878, + "total": 0.275121 }, - "sirius::Simulation_context::initialize": { - "avg": 1.226144, + "sirius::Simulation_context::init_atoms_to_grid_idx": { + "avg": 0.001405, "count": 1, - "max": 1.226144, - "min": 1.226144, - "total": 1.226144 + "max": 0.001405, + "min": 0.001405, + "total": 0.001405 }, - "sirius::Simulation_context_base::init_atoms_to_grid_idx": { - "avg": 0.001789, + "sirius::Simulation_context::init_comm": { + "avg": 0.000252, "count": 1, - "max": 0.001789, - "min": 0.001789, - "total": 0.001789 + "max": 0.000252, + "min": 0.000252, + "total": 0.000252 }, - "sirius::Simulation_context_base::initialize": { - "avg": 1.211421, + "sirius::Simulation_context::init_fft": { + "avg": 0.019242, "count": 1, - "max": 1.211421, - "min": 1.211421, - "total": 1.211421 + "max": 0.019242, + "min": 0.019242, + "total": 0.019242 }, - "sirius::Simulation_context_base::make_periodic_function": { - "avg": 0.000252666666666667, - "count": 3, - "max": 0.000313, - "min": 0.000193, - "total": 0.000758 + "sirius::Simulation_context::initialize": { + "avg": 2.546791, + "count": 1, + "max": 2.546791, + "min": 2.546791, + "total": 2.546791 + }, + "sirius::Simulation_context::make_periodic_function": { + "avg": 0.0002375, + "count": 6, + "max": 0.000399, + "min": 0.000111, + "total": 0.001425 + }, + "sirius::Simulation_context::update": { + "avg": 0.069849, + "count": 1, + "max": 0.069849, + "min": 0.069849, + "total": 0.069849 }, "sirius::Simulation_parameters::import": { + "avg": 0.000184, + "count": 1, + "max": 0.000184, + "min": 0.000184, + "total": 0.000184 + }, + "sirius::Smooth_periodic_function::fft_transform": { + "avg": 0.00039653333333333323, + "count": 90, + "max": 0.001542, + "min": 0.000164, + "total": 0.03568799999999999 + }, + "sirius::Smooth_periodic_function::gather_f_pw": { + "avg": 1.8e-05, + "count": 2, + "max": 2e-05, + "min": 1.6e-05, + "total": 3.6e-05 + }, + "sirius::Smooth_periodic_function|inner": { + "avg": 6.542105263157898e-05, + "count": 133, + "max": 0.000104, + "min": 5.4e-05, + "total": 0.008701000000000004 + }, + "sirius::Stress|ewald": { + "avg": 0.000553, + "count": 1, + "max": 0.000553, + "min": 0.000553, + "total": 0.000553 + }, + "sirius::Stress|har": { "avg": 0.00019, "count": 1, "max": 0.00019, "min": 0.00019, "total": 0.00019 }, - "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.000577205479452055, - "count": 73, - "max": 0.001334, - "min": 0.000256, - "total": 0.042136 + "sirius::Stress|kin": { + "avg": 0.000154, + "count": 1, + "max": 0.000154, + "min": 0.000154, + "total": 0.000154 }, - "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 3.3e-05, + "sirius::Stress|nonloc": { + "avg": 0.001371, + "count": 1, + "max": 0.001371, + "min": 0.001371, + "total": 0.001371 + }, + "sirius::Stress|us": { + "avg": 0.037227, + "count": 1, + "max": 0.037227, + "min": 0.037227, + "total": 0.037227 + }, + "sirius::Stress|us|gemm": { + "avg": 0.00030250000000000003, + "count": 18, + "max": 0.00035, + "min": 0.000279, + "total": 0.005445000000000001 + }, + "sirius::Stress|us|phase_fac": { + "avg": 7.95e-05, "count": 2, - "max": 3.4e-05, - "min": 3.2e-05, - "total": 6.6e-05 + "max": 8.9e-05, + "min": 7e-05, + "total": 0.000159 + }, + "sirius::Stress|us|prepare": { + "avg": 8.316666666666668e-05, + "count": 18, + "max": 0.000105, + "min": 7.1e-05, + "total": 0.0014970000000000003 }, - "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.000327, + "sirius::Stress|vloc": { + "avg": 0.000695, "count": 1, - "max": 0.000327, - "min": 0.000327, - "total": 0.000327 + "max": 0.000695, + "min": 0.000695, + "total": 0.000695 + }, + "sirius::Unit_cell::find_nearest_neighbours": { + "avg": 0.00024, + "count": 2, + "max": 0.000277, + "min": 0.000203, + "total": 0.00048 }, "sirius::Unit_cell::get_symmetry": { - "avg": 0.020892, - "count": 1, - "max": 0.020892, - "min": 0.020892, - "total": 0.020892 + "avg": 0.056532, + "count": 2, + "max": 0.057743, + "min": 0.055321, + "total": 0.113064 }, "sirius::Unit_cell::initialize": { - "avg": 0.056843, + "avg": 0.097561, "count": 1, - "max": 0.056843, - "min": 0.056843, - "total": 0.056843 + "max": 0.097561, + "min": 0.097561, + "total": 0.097561 + }, + "sirius::Unit_cell::update": { + "avg": 0.056787500000000005, + "count": 2, + "max": 0.058028, + "min": 0.055547, + "total": 0.11357500000000001 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { - "avg": 0.020862, - "count": 1, - "max": 0.020862, - "min": 0.020862, - "total": 0.020862 + "avg": 0.056471499999999994, + "count": 2, + "max": 0.057689, + "min": 0.055254, + "total": 0.11294299999999999 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { - "avg": 0.020599, - "count": 1, - "max": 0.020599, - "min": 0.020599, - "total": 0.020599 + "avg": 0.056236999999999995, + "count": 2, + "max": 0.057426, + "min": 0.055048, + "total": 0.11247399999999999 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { - "avg": 3.7e-05, - "count": 1, - "max": 3.7e-05, - "min": 3.7e-05, - "total": 3.7e-05 + "avg": 4.9e-05, + "count": 2, + "max": 7.1e-05, + "min": 2.7e-05, + "total": 9.8e-05 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { - "avg": 0.000174, - "count": 1, - "max": 0.000174, - "min": 0.000174, - "total": 0.000174 + "avg": 0.0001345, + "count": 2, + "max": 0.000149, + "min": 0.00012, + "total": 0.000269 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { - "avg": 4.2e-05, - "count": 1, - "max": 4.2e-05, - "min": 4.2e-05, - "total": 4.2e-05 + "avg": 1.95e-05, + "count": 2, + "max": 2.9e-05, + "min": 1e-05, + "total": 3.9e-05 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw": { - "avg": 0.0220824090909091, + "avg": 0.002137636363636364, "count": 22, - "max": 0.026567, - "min": 0.019494, - "total": 0.485813 + "max": 0.002591, + "min": 0.002017, + "total": 0.04702800000000001 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw|local": { - "avg": 0.0216236363636364, + "avg": 0.0018236363636363634, "count": 22, - "max": 0.026041, - "min": 0.019089, - "total": 0.47572 + "max": 0.002167, + "min": 0.001728, + "total": 0.040119999999999996 } } } \ No newline at end of file diff --git a/verification/test4/sirius.json b/verification/test4/sirius.json index 8b3236590..a598075ef 100644 --- a/verification/test4/sirius.json +++ b/verification/test4/sirius.json @@ -3,7 +3,9 @@ "!cyclic_block_size" : 2, "processing_unit" : "cpu", "std_evp_solver_type" : "lapack", - "gen_evp_solver_type" : "lapack" + "gen_evp_solver_type" : "lapack", + "print_forces" : true, + "print_stress" : true }, "parameters" : { diff --git a/verification/test5/output_ref.json b/verification/test5/output_ref.json index a0e710217..ecc4f2a32 100644 --- a/verification/test5/output_ref.json +++ b/verification/test5/output_ref.json @@ -1,890 +1,1054 @@ { - "build_date": "Mon, 8 Oct 2018 10:57:13", "comm_world_size": 1, "counters": { "band_evp_work_count": 4969.282414003666, "local_operator_num_applied": 13082 }, - "git_hash": "42583bb550104a0c716f903542cfd02c12f9ce3d", + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, - "band_gap": 0.02854722133519194, + "band_gap": 0.028547221335633477, "chemical_formula": "Ni2O2", "converged": true, "core_leakage": 0.0, - "efermi": 0.5181972923628417, + "efermi": 0.5181972923632456, "energy": { - "bxc": -0.04455957554707504, + "bxc": -0.044559575548275575, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": -24.383880821087054, + "eval_sum": -24.383880821074793, "ewald": -238.91986640822148, - "exc": -42.21059571330334, - "kin": 116.57128690189715, - "total": -375.37312388882003, - "veff": -140.91060814743713, - "vha": 246.39046474553072, - "vxc": -53.291891851010085 + "exc": -42.210595713309075, + "kin": 116.57128690190817, + "total": -375.3731238888219, + "veff": -140.9106081474347, + "vha": 246.39046474556187, + "vxc": -53.2918918510161 }, "fft_coarse_grid": [40,40,40], "fft_grid": [64,64,64], + "forces": [ + [-2.3696676421475333e-13,7.212964508425352e-14,-7.616721824095337e-14], + [-1.456389170638043e-13,1.6344740751024835e-13,1.514515745515369e-14], + [-2.3036086424211167e-14,1.3160127116871717e-14,-1.1336934322262362e-14], + [-1.3537056076465668e-14,2.266268487577066e-14,-1.833079016670026e-15] + ], "mpi_grid": [1,1], "num_atoms": 4, "num_bands": 34, "num_fv_states": -1, "num_scf_iterations": 19, "omega": 245.9188061410677, - "pw_cutoff": 20.0 + "pw_cutoff": 20.0, + "stress": [ + [0.0010200133751754152,0.0001779979038823611,0.00017799790388236096], + [0.0001779979038823611,0.0010200133751754152,0.00017799790388236096], + [0.00017799790388236096,0.00017799790388236096,0.0010200133751755228] + ] }, "task": 0, "threads_per_rank": 8, "timers": { - "Eigensolver_lapack::solve_std": { - "avg": 0.0018933778409090907, + "Eigensolver_lapack|zheevr": { + "avg": 0.001551340909090911, "count": 352, - "max": 0.004835, - "min": 0.000777, - "total": 0.666469 + "max": 0.002669, + "min": 0.000654, + "total": 0.5460720000000007 }, - "Eigensolver_lapack::solve_std|zheevr": { - "avg": 0.0018836505681818182, - "count": 352, - "max": 0.004823, - "min": 0.00077, - "total": 0.663045 + "Eigensolver_lapack|zhegvx": { + "avg": 0.0007628750000000002, + "count": 168, + "max": 0.001158, + "min": 0.000622, + "total": 0.12816300000000003 }, "sddk::FFT3D::FFT3D": { - "avg": 0.010809, + "avg": 0.0029615, "count": 2, - "max": 0.017196, - "min": 0.004422, - "total": 0.021618 + "max": 0.003605, + "min": 0.002318, + "total": 0.005923 }, "sddk::FFT3D::prepare": { - "avg": 5.616019417475728e-05, + "avg": 6.678640776699033e-05, "count": 206, - "max": 0.000261, - "min": 3.8e-05, - "total": 0.011569 + "max": 0.000224, + "min": 4.7e-05, + "total": 0.013758000000000008 }, "sddk::FFT3D::prepare|cpu": { - "avg": 5.2951456310679626e-05, + "avg": 6.14271844660194e-05, "count": 206, - "max": 0.000258, - "min": 3.6e-05, - "total": 0.010908000000000003 + "max": 0.000196, + "min": 4.4e-05, + "total": 0.012653999999999997 }, "sddk::FFT3D::transform": { - "avg": 0.0009832442027565454, - "count": 30618, - "max": 0.00733, - "min": 0.00038, - "total": 30.104970999999903 + "avg": 0.0003571653551020425, + "count": 30625, + "max": 0.002892, + "min": 0.000198, + "total": 10.938189000000051 }, "sddk::FFT3D::transform_xy": { - "avg": 0.0002790748252661879, - "count": 30618, - "max": 0.001133, - "min": 0.000173, - "total": 8.54471300000014 + "avg": 0.00025718664489796263, + "count": 30625, + "max": 0.001888, + "min": 0.000131, + "total": 7.876341000000106 }, "sddk::FFT3D::transform_z": { - "avg": 0.0006982933241883822, - "count": 30618, - "max": 0.006771, - "min": 0.000124, - "total": 21.380344999999885 + "avg": 9.259157551020432e-05, + "count": 30625, + "max": 0.000973, + "min": 4.8e-05, + "total": 2.835617000000007 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.0006955718204977431, - "count": 30618, - "max": 0.006767, - "min": 0.000121, - "total": 21.2970179999999 + "avg": 8.945018775510235e-05, + "count": 30625, + "max": 0.000969, + "min": 4.5e-05, + "total": 2.7394120000000095 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.0006926874061009788, - "count": 30618, - "max": 0.006761, - "min": 0.000119, - "total": 21.20870299999977 + "avg": 8.58998530612253e-05, + "count": 30625, + "max": 0.000911, + "min": 4.2e-05, + "total": 2.6306830000000248 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.0004959166666666667, + "avg": 0.0004534166666666667, "count": 12, - "max": 0.001804, - "min": 0.000162, - "total": 0.0059510000000000006 + "max": 0.001453, + "min": 0.000163, + "total": 0.0054410000000000005 }, "sddk::Gvec::init": { - "avg": 0.0027938333333333335, + "avg": 0.0016328333333333334, "count": 6, - "max": 0.011744, - "min": 0.000609, - "total": 0.016763 + "max": 0.006319, + "min": 0.000465, + "total": 0.009797 }, - "sddk::Wave_functions::inner": { - "avg": 0.00023625359195402393, + "sddk::inner": { + "avg": 0.00022001652298850538, "count": 1392, - "max": 0.000826, - "min": 4e-06, - "total": 0.3288650000000013 - }, - "sddk::Wave_functions::orthogonalize": { - "avg": 0.001305931818181818, - "count": 352, - "max": 0.00317, - "min": 0.000168, - "total": 0.45968799999999993 - }, - "sddk::Wave_functions::transform": { - "avg": 0.0004964613636363641, - "count": 880, - "max": 0.0016, - "min": 8.4e-05, - "total": 0.43688600000000044 + "max": 0.000912, + "min": 5e-06, + "total": 0.3062629999999995 }, - "sddk::Wave_functions::transform|init": { - "avg": 3.6851136363636376e-05, - "count": 880, - "max": 0.000527, - "min": 2e-06, - "total": 0.03242900000000001 + "sddk::inner|local": { + "avg": 0.00021618821839080438, + "count": 1392, + "max": 0.000903, + "min": 3e-06, + "total": 0.3009339999999997 }, "sddk::matrix_storage::matrix_storage": { - "avg": 4.718253968254028e-06, + "avg": 1.1051587301587389e-06, "count": 504, - "max": 0.000288, + "max": 1e-05, "min": 0.0, - "total": 0.00237800000000003 + "total": 0.0005570000000000044 }, "sddk::matrix_storage::remap_backward": { - "avg": 7.461538461538516e-07, + "avg": 1.146153846153857e-06, "count": 520, - "max": 2e-06, + "max": 1.2e-05, "min": 0.0, - "total": 0.0003880000000000028 + "total": 0.0005960000000000056 }, "sddk::matrix_storage::remap_forward": { - "avg": 2.063235294117662e-06, + "avg": 4.52499999999998e-06, "count": 680, - "max": 2.2e-05, + "max": 1.8e-05, + "min": 1e-06, + "total": 0.0030769999999999864 + }, + "sddk::matrix_storage::set_num_extra": { + "avg": 1.0675000000000033e-06, + "count": 1200, + "max": 3e-06, "min": 0.0, - "total": 0.0014030000000000101 + "total": 0.001281000000000004 + }, + "sddk::orthogonalize": { + "avg": 0.0008731846590909094, + "count": 352, + "max": 0.002067, + "min": 0.000183, + "total": 0.3073610000000001 + }, + "sddk::orthogonalize|tmtrx": { + "avg": 3.304545454545454e-05, + "count": 352, + "max": 0.000124, + "min": 2e-06, + "total": 0.011631999999999998 + }, + "sddk::orthogonalize|transform": { + "avg": 0.00010398863636363635, + "count": 352, + "max": 0.000314, + "min": 8e-06, + "total": 0.036604 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.007868, + "avg": 0.003997, "count": 1, - "max": 0.007868, - "min": 0.007868, - "total": 0.007868 + "max": 0.003997, + "min": 0.003997, + "total": 0.003997 }, "sddk::remap_gvec_to_shells|remap_backward": { - "avg": 0.0019257875000000002, + "avg": 0.00030877499999999996, "count": 80, - "max": 0.003052, - "min": 0.001732, - "total": 0.154063 + "max": 0.000508, + "min": 0.000273, + "total": 0.024701999999999995 }, "sddk::remap_gvec_to_shells|remap_forward": { - "avg": 0.0019299625000000005, + "avg": 0.00033256249999999996, "count": 80, - "max": 0.002869, - "min": 0.001734, - "total": 0.15439700000000003 + "max": 0.00055, + "min": 0.000286, + "total": 0.026604999999999997 + }, + "sddk::transform": { + "avg": 0.0002906181818181819, + "count": 880, + "max": 0.000922, + "min": 6.5e-05, + "total": 0.2557440000000001 + }, + "sddk::transform|init": { + "avg": 3.976250000000002e-05, + "count": 880, + "max": 0.0003, + "min": 2e-06, + "total": 0.034991000000000015 + }, + "sddk::transform|local": { + "avg": 0.00011023148148148188, + "count": 1944, + "max": 0.000428, + "min": 2.5e-05, + "total": 0.21429000000000076 }, "sirius::Atom_type::init": { - "avg": 0.0191225, + "avg": 0.017808, "count": 2, - "max": 0.025189, - "min": 0.013056, - "total": 0.038245 + "max": 0.023092, + "min": 0.012524, + "total": 0.035616 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.077852, + "avg": 0.0784895, "count": 2, - "max": 0.085427, - "min": 0.070277, - "total": 0.155704 + "max": 0.089549, + "min": 0.06743, + "total": 0.156979 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { + "avg": 0.05287475, + "count": 12, + "max": 0.071238, + "min": 0.036813, + "total": 0.634497 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { + "avg": 0.04703516666666666, + "count": 12, + "max": 0.071218, + "min": 0.03081, + "total": 0.564422 + }, + "sirius::Augmentation_operator_gvec_deriv|constructor": { + "avg": 0.065878, + "count": 1, + "max": 0.065878, + "min": 0.065878, + "total": 0.065878 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.35630491250000007, + "avg": 0.15917823749999999, "count": 80, - "max": 0.55728, - "min": 0.161479, - "total": 28.504393000000004 + "max": 0.22779, + "min": 0.064372, + "total": 12.734259 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 0.0001090125, + "avg": 2.5512499999999997e-05, "count": 80, - "max": 0.000367, - "min": 8.5e-05, - "total": 0.008721 + "max": 6.5e-05, + "min": 1.7e-05, + "total": 0.002041 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.0015608339843750005, + "avg": 0.0013085898437499995, "count": 512, - "max": 0.004838, - "min": 0.000733, - "total": 0.7991470000000003 + "max": 0.002673, + "min": 0.000626, + "total": 0.6699979999999998 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.35333570000000003, + "avg": 0.15677517500000002, "count": 80, - "max": 0.554499, - "min": 0.15858, - "total": 28.266856 + "max": 0.225769, + "min": 0.062138, + "total": 12.542014000000002 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 0.00047816249999999976, + "avg": 0.00025276875, "count": 160, - "max": 0.001081, - "min": 0.00021, - "total": 0.07650599999999996 - }, - "sirius::Band::diag_pseudo_potential_davidson|wf": { - "avg": 1.5949999999999998e-05, - "count": 80, - "max": 4.5e-05, - "min": 1.2e-05, - "total": 0.0012759999999999998 + "max": 0.000455, + "min": 0.000118, + "total": 0.040443 }, "sirius::Band::initialize_subspace": { - "avg": 1.072287, + "avg": 0.232624, "count": 1, - "max": 1.072287, - "min": 1.072287, - "total": 1.072287 + "max": 0.232624, + "min": 0.232624, + "total": 0.232624 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.26656725000000003, + "avg": 0.057749499999999995, "count": 4, - "max": 0.274488, - "min": 0.261585, - "total": 1.0662690000000001 + "max": 0.060134, + "min": 0.05633, + "total": 0.23099799999999998 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.12332675000000001, + "avg": 0.00064375, "count": 4, - "max": 0.127584, - "min": 0.120211, - "total": 0.49330700000000005 + "max": 0.00076, + "min": 0.000586, + "total": 0.002575 }, "sirius::Band::residuals": { - "avg": 0.0004898808593750002, + "avg": 0.00045164062500000065, "count": 512, - "max": 0.001547, + "max": 0.001175, "min": 0.0, - "total": 0.2508190000000001 + "total": 0.23124000000000033 }, "sirius::Band::residuals_aux": { - "avg": 0.0002780666666666665, + "avg": 0.0003922166666666666, "count": 360, - "max": 0.000758, - "min": 0.000201, - "total": 0.10010399999999996 + "max": 0.000601, + "min": 0.00026, + "total": 0.141198 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.0003656845930232559, + "avg": 0.00039313226744186025, "count": 688, - "max": 0.001076, - "min": 0.000161, - "total": 0.25159100000000006 + "max": 0.001009, + "min": 0.000199, + "total": 0.27047499999999985 }, "sirius::Band::solve": { - "avg": 1.4293161500000002, + "avg": 0.6385474, "count": 20, - "max": 2.151128, - "min": 0.662009, - "total": 28.586323000000004 + "max": 0.900482, + "min": 0.268338, + "total": 12.770947999999999 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.00054875, + "avg": 0.0013865, "count": 4, - "max": 0.000589, - "min": 0.000511, - "total": 0.002195 + "max": 0.001538, + "min": 0.001241, + "total": 0.005546 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.00054475, + "avg": 0.00066925, "count": 4, - "max": 0.000586, - "min": 0.000508, - "total": 0.002179 + "max": 0.000849, + "min": 0.000554, + "total": 0.002677 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 2.9268292682926804e-07, - "count": 164, - "max": 1e-06, + "avg": 4.1111111111111054e-07, + "count": 180, + "max": 7e-06, "min": 0.0, - "total": 4.799999999999996e-05 + "total": 7.39999999999999e-05 }, "sirius::Beta_projectors_base::generate": { - "avg": 0.0002917983333333336, - "count": 600, - "max": 0.001111, - "min": 0.000267, - "total": 0.17507900000000018 + "avg": 0.0003205576923076923, + "count": 52, + "max": 0.000784, + "min": 0.000253, + "total": 0.016669 }, "sirius::Beta_projectors_base::inner": { - "avg": 0.0003889838235294111, - "count": 680, - "max": 0.000699, - "min": 5.9e-05, - "total": 0.26450899999999955 + "avg": 0.0003601919191919186, + "count": 792, + "max": 0.00087, + "min": 7.3e-05, + "total": 0.2852719999999995 + }, + "sirius::Beta_projectors_base::local_inner_aux": { + "avg": 0.0003558876262626261, + "count": 792, + "max": 0.00086, + "min": 6.8e-05, + "total": 0.28186299999999986 }, "sirius::Beta_projectors_base::prepare": { - "avg": 3.2987804878048953e-06, - "count": 164, - "max": 0.000416, - "min": 0.0, - "total": 0.0005410000000000029 + "avg": 9.874999999999998e-06, + "count": 8, + "max": 6.3e-05, + "min": 1e-06, + "total": 7.899999999999998e-05 + }, + "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { + "avg": 0.0030110000000000002, + "count": 4, + "max": 0.004348, + "min": 0.0024, + "total": 0.012044000000000001 }, "sirius::Broyden1::mix": { - "avg": 0.0031344500000000004, + "avg": 0.0020570500000000004, "count": 20, - "max": 0.005573, - "min": 7e-05, - "total": 0.06268900000000001 + "max": 0.003505, + "min": 6.5e-05, + "total": 0.041141000000000004 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.066896, + "avg": 0.000415, "count": 1, - "max": 0.066896, - "min": 0.066896, - "total": 0.066896 + "max": 0.000415, + "min": 0.000415, + "total": 0.000415 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 56.791583, + "avg": 17.779545, "count": 1, - "max": 56.791583, - "min": 56.791583, - "total": 56.791583 + "max": 17.779545, + "min": 17.779545, + "total": 17.779545 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 2.8373149, + "avg": 0.8885611499999999, "count": 20, - "max": 3.532232, - "min": 1.943627, - "total": 56.746298 + "max": 1.130562, + "min": 0.520594, + "total": 17.771223 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.0016235374999999993, + "avg": 0.0010728499999999998, "count": 80, - "max": 0.002907, - "min": 0.00146, - "total": 0.12988299999999994 + "max": 0.0015790000000000001, + "min": 0.000909, + "total": 0.08582799999999999 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.057968662500000025, + "avg": 0.02705663749999999, "count": 80, - "max": 0.070257, - "min": 0.052103, - "total": 4.637493000000002 + "max": 0.0363, + "min": 0.021733, + "total": 2.1645309999999993 }, "sirius::Density::augment": { - "avg": 0.19360554999999996, + "avg": 0.05906080000000001, "count": 20, - "max": 0.212789, - "min": 0.186608, - "total": 3.8721109999999994 + "max": 0.065764, + "min": 0.054635, + "total": 1.1812160000000003 }, "sirius::Density::compute_atomic_mag_mom": { - "avg": 0.000299, + "avg": 0.000364, "count": 1, - "max": 0.000299, - "min": 0.000299, - "total": 0.000299 + "max": 0.000364, + "min": 0.000364, + "total": 0.000364 }, "sirius::Density::generate": { - "avg": 0.43594835000000004, + "avg": 0.17274649999999997, "count": 20, - "max": 0.476713, - "min": 0.422259, - "total": 8.718967000000001 + "max": 0.211005, + "min": 0.157904, + "total": 3.4549299999999996 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.072073, + "avg": 0.001672, "count": 1, - "max": 0.072073, - "min": 0.072073, - "total": 0.072073 + "max": 0.001672, + "min": 0.001672, + "total": 0.001672 }, "sirius::Density::generate_rho_aug": { - "avg": 0.19320195, + "avg": 0.058748199999999993, "count": 20, - "max": 0.212312, - "min": 0.186203, - "total": 3.864039 + "max": 0.06539, + "min": 0.054467, + "total": 1.174964 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.009252312500000002, + "avg": 0.009956349999999997, "count": 80, - "max": 0.030564, - "min": 0.002294, - "total": 0.7401850000000001 - }, - "sirius::Density::generate_rho_aug|phase_fac": { - "avg": 0.0684988, - "count": 40, - "max": 0.087115, - "min": 0.063569, - "total": 2.7399519999999997 + "max": 0.03347, + "min": 0.00151, + "total": 0.7965079999999998 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.0026073750000000003, + "avg": 0.0022510625, "count": 80, - "max": 0.004417, - "min": 0.001453, - "total": 0.20859000000000003 + "max": 0.004518, + "min": 0.001234, + "total": 0.180085 }, "sirius::Density::generate_valence": { - "avg": 0.43594374999999996, + "avg": 0.17274035, "count": 20, - "max": 0.476709, - "min": 0.422255, - "total": 8.718874999999999 + "max": 0.211, + "min": 0.157899, + "total": 3.4548069999999997 }, "sirius::Density::initial_density": { - "avg": 0.083949, + "avg": 0.005897, "count": 1, - "max": 0.083949, - "min": 0.083949, - "total": 0.083949 + "max": 0.005897, + "min": 0.005897, + "total": 0.005897 }, "sirius::Density::symmetrize_density_matrix": { - "avg": 0.00367855, + "avg": 0.0033550999999999997, "count": 20, - "max": 0.005897, - "min": 0.003199, - "total": 0.073571 + "max": 0.003942, + "min": 0.003066, + "total": 0.067102 }, "sirius::Density::update": { - "avg": 0.073189, + "avg": 0.001737, "count": 1, - "max": 0.073189, - "min": 0.073189, - "total": 0.073189 + "max": 0.001737, + "min": 0.001737, + "total": 0.001737 }, "sirius::Field4D::symmetrize": { - "avg": 0.019320025, + "avg": 0.0122516, "count": 40, - "max": 0.021985, - "min": 0.017187, - "total": 0.7728010000000001 + "max": 0.016394, + "min": 0.010522, + "total": 0.490064 + }, + "sirius::Force::calc_forces_core": { + "avg": 0.003368, + "count": 1, + "max": 0.003368, + "min": 0.003368, + "total": 0.003368 + }, + "sirius::Force::calc_forces_ewald": { + "avg": 0.002088, + "count": 1, + "max": 0.002088, + "min": 0.002088, + "total": 0.002088 + }, + "sirius::Force::calc_forces_nonloc": { + "avg": 0.022064, + "count": 1, + "max": 0.022064, + "min": 0.022064, + "total": 0.022064 + }, + "sirius::Force::calc_forces_scf_corr": { + "avg": 0.001585, + "count": 1, + "max": 0.001585, + "min": 0.001585, + "total": 0.001585 + }, + "sirius::Force::calc_forces_us": { + "avg": 0.031139, + "count": 1, + "max": 0.031139, + "min": 0.031139, + "total": 0.031139 + }, + "sirius::Force::calc_forces_vloc": { + "avg": 0.001769, + "count": 1, + "max": 0.001769, + "min": 0.001769, + "total": 0.001769 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.05185533846153849, + "avg": 0.021570807692307705, "count": 520, - "max": 0.089223, - "min": 0.002469, - "total": 26.964776000000015 + "max": 0.038255, + "min": 0.000978, + "total": 11.216820000000007 }, "sirius::Hamiltonian::get_h_diag": { - "avg": 0.0018637250000000005, + "avg": 0.0015415125, "count": 80, - "max": 0.002427, - "min": 0.00171, - "total": 0.14909800000000004 + "max": 0.002407, + "min": 0.001263, + "total": 0.123321 }, "sirius::Hamiltonian::get_o_diag": { - "avg": 0.00093925, + "avg": 0.0007613750000000002, "count": 80, - "max": 0.001567, - "min": 0.000852, - "total": 0.07514 - }, - "sirius::Hamiltonian::prepare": { - "avg": 2.1952380952380954e-05, - "count": 21, - "max": 3e-05, - "min": 1.9e-05, - "total": 0.00046100000000000004 + "max": 0.001229, + "min": 0.000627, + "total": 0.06091000000000002 }, "sirius::K_point::K_point": { - "avg": 1.2499999999999999e-06, + "avg": 1.4999999999999998e-06, "count": 4, "max": 4e-06, "min": 0.0, - "total": 4.9999999999999996e-06 + "total": 5.999999999999999e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.00076975, + "avg": 0.00051325, "count": 4, - "max": 0.000847, - "min": 0.000709, - "total": 0.003079 + "max": 0.000556, + "min": 0.000478, + "total": 0.002053 }, "sirius::K_point::initialize": { - "avg": 0.00227875, + "avg": 0.0021885, "count": 4, - "max": 0.002623, - "min": 0.001876, - "total": 0.009115 + "max": 0.002358, + "min": 0.001964, + "total": 0.008754 }, "sirius::K_point::update": { - "avg": 0.00104325, + "avg": 0.0016424999999999999, "count": 4, - "max": 0.001149, - "min": 0.000899, - "total": 0.004173 + "max": 0.001844, + "min": 0.001457, + "total": 0.0065699999999999995 }, "sirius::K_point_set::add_kpoint": { - "avg": 6.2499999999999995e-06, + "avg": 5.750000000000001e-06, "count": 4, - "max": 1.7e-05, + "max": 1.6e-05, "min": 2e-06, - "total": 2.4999999999999998e-05 + "total": 2.3000000000000003e-05 }, "sirius::K_point_set::create_k_mesh": { - "avg": 0.031544, + "avg": 0.061044, "count": 1, - "max": 0.031544, - "min": 0.031544, - "total": 0.031544 + "max": 0.061044, + "min": 0.061044, + "total": 0.061044 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 0.00045329999999999996, + "avg": 0.0005899500000000001, "count": 20, - "max": 0.000604, - "min": 0.000298, - "total": 0.009066 + "max": 0.000856, + "min": 0.000364, + "total": 0.011799000000000002 }, "sirius::K_point_set::initialize": { - "avg": 0.009527, + "avg": 0.008815, "count": 1, - "max": 0.009527, - "min": 0.009527, - "total": 0.009527 + "max": 0.008815, + "min": 0.008815, + "total": 0.008815 }, "sirius::K_point_set::sync_band_energies": { - "avg": 9.750000000000003e-06, + "avg": 6.9499999999999995e-06, "count": 20, - "max": 2.9e-05, - "min": 8e-06, - "total": 0.00019500000000000005 + "max": 1.7e-05, + "min": 5e-06, + "total": 0.000139 }, "sirius::Local_operator::apply_h": { - "avg": 0.05052137500000001, + "avg": 0.020759903846153838, "count": 520, - "max": 0.08777, - "min": 0.001946, - "total": 26.271115000000005 + "max": 0.037144, + "min": 0.00073, + "total": 10.795149999999996 }, "sirius::Local_operator::prepare": { - "avg": 0.0007763047619047616, + "avg": 0.00028980952380952383, "count": 105, - "max": 0.005937, - "min": 2.2e-05, - "total": 0.08151199999999997 + "max": 0.00189, + "min": 2.3e-05, + "total": 0.030430000000000002 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 9.285714285714285e-07, + "avg": 1.214285714285714e-06, "count": 42, "max": 2e-06, "min": 0.0, - "total": 3.9e-05 + "total": 5.0999999999999986e-05 }, "sirius::Non_local_operator::apply": { - "avg": 0.0003218019230769229, + "avg": 0.0002071009615384617, "count": 1040, - "max": 0.00071, - "min": 7.7e-05, - "total": 0.33467399999999986 + "max": 0.000721, + "min": 7.8e-05, + "total": 0.21538500000000016 }, "sirius::Periodic_function::add": { - "avg": 0.00018257142857142861, + "avg": 0.00012685714285714283, "count": 42, - "max": 0.000362, - "min": 0.000104, - "total": 0.007668000000000002 + "max": 0.000218, + "min": 9.1e-05, + "total": 0.005327999999999999 }, "sirius::Periodic_function::inner": { - "avg": 0.00013369199999999993, - "count": 250, - "max": 0.000344, - "min": 8.6e-05, - "total": 0.03342299999999998 + "avg": 0.00013849603174603174, + "count": 252, + "max": 0.000263, + "min": 9.5e-05, + "total": 0.034901 }, "sirius::Periodic_function::integrate": { - "avg": 8.421951219512194e-05, + "avg": 0.00010419512195121952, "count": 41, - "max": 0.000153, - "min": 7.4e-05, - "total": 0.0034529999999999995 + "max": 0.000178, + "min": 8.4e-05, + "total": 0.004272000000000001 }, "sirius::Potential::Potential": { - "avg": 0.078562, + "avg": 0.014179, "count": 1, - "max": 0.078562, - "min": 0.078562, - "total": 0.078562 + "max": 0.014179, + "min": 0.014179, + "total": 0.014179 }, "sirius::Potential::generate": { - "avg": 0.9067703333333332, + "avg": 0.04025247619047619, "count": 21, - "max": 0.961944, - "min": 0.735199, - "total": 19.042177 + "max": 0.043441, + "min": 0.035459, + "total": 0.8453020000000001 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.796443238095238, + "avg": 0.01146247619047619, "count": 21, - "max": 0.852331, - "min": 0.619945, - "total": 16.725308 + "max": 0.013108, + "min": 0.010442, + "total": 0.24071199999999998 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 5.714285714285715e-07, + "avg": 6.666666666666668e-07, "count": 21, "max": 1e-06, "min": 0.0, - "total": 1.2000000000000002e-05 + "total": 1.4000000000000003e-05 }, "sirius::Potential::generate_local_potential": { - "avg": 0.075332, + "avg": 0.00405, "count": 1, - "max": 0.075332, - "min": 0.075332, - "total": 0.075332 + "max": 0.00405, + "min": 0.00405, + "total": 0.00405 }, "sirius::Potential::poisson": { - "avg": 0.07261076190476191, + "avg": 0.00113652380952381, "count": 21, - "max": 0.074774, - "min": 0.070249, - "total": 1.5248260000000002 + "max": 0.001366, + "min": 0.000993, + "total": 0.02386700000000001 }, "sirius::Potential::update": { - "avg": 0.076388, + "avg": 0.004111, "count": 1, - "max": 0.076388, - "min": 0.076388, - "total": 0.076388 + "max": 0.004111, + "min": 0.004111, + "total": 0.004111 }, "sirius::Potential::xc": { - "avg": 0.026528000000000003, + "avg": 0.025180285714285714, "count": 21, - "max": 0.029062, - "min": 0.024882, - "total": 0.557088 + "max": 0.027473, + "min": 0.021718, + "total": 0.528786 }, "sirius::Potential::xc_rg_magnetic": { - "avg": 0.02652009523809524, + "avg": 0.025170476190476195, "count": 21, - "max": 0.029054, - "min": 0.024876, - "total": 0.556922 + "max": 0.027459, + "min": 0.021709, + "total": 0.52858 }, "sirius::Potential::xc_rg_magnetic|libxc": { - "avg": 0.017296047619047617, + "avg": 0.009083047619047619, "count": 21, - "max": 0.019305, - "min": 0.01559, - "total": 0.36321699999999996 + "max": 0.010391, + "min": 0.005821, + "total": 0.190744 }, "sirius::Potential::xc_rg_magnetic|up_dn": { - "avg": 0.0035709523809523817, + "avg": 0.0013359047619047621, "count": 21, - "max": 0.004583, - "min": 0.003257, - "total": 0.07499000000000001 + "max": 0.001475, + "min": 0.001228, + "total": 0.028054000000000003 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.122726, + "avg": 0.12332, "count": 2, - "max": 0.123817, - "min": 0.121635, - "total": 0.245452 + "max": 0.124081, + "min": 0.122559, + "total": 0.24664 }, "sirius::Radial_integrals|aug": { - "avg": 0.686804, + "avg": 0.7006985, "count": 2, - "max": 0.76553, - "min": 0.608078, - "total": 1.373608 + "max": 0.775028, + "min": 0.626369, + "total": 1.401397 }, "sirius::Radial_integrals|beta": { - "avg": 0.15758, + "avg": 0.152349, "count": 2, - "max": 0.15917, - "min": 0.15599, - "total": 0.31516 + "max": 0.155474, + "min": 0.149224, + "total": 0.304698 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.0305265, + "avg": 0.030308500000000002, "count": 2, - "max": 0.033271, - "min": 0.027782, - "total": 0.061053 + "max": 0.032701, + "min": 0.027916, + "total": 0.060617000000000004 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.027067, + "avg": 0.031391, "count": 1, - "max": 0.027067, - "min": 0.027067, - "total": 0.027067 + "max": 0.031391, + "min": 0.031391, + "total": 0.031391 }, "sirius::Radial_integrals|vloc": { - "avg": 0.1005925, + "avg": 0.104434, "count": 2, - "max": 0.106799, - "min": 0.094386, - "total": 0.201185 + "max": 0.112458, + "min": 0.09641, + "total": 0.208868 }, "sirius::Simulation_context::init_atoms_to_grid_idx": { - "avg": 0.004508, + "avg": 0.004309, "count": 1, - "max": 0.004508, - "min": 0.004508, - "total": 0.004508 + "max": 0.004309, + "min": 0.004309, + "total": 0.004309 }, "sirius::Simulation_context::init_comm": { - "avg": 0.005078, + "avg": 0.000261, "count": 1, - "max": 0.005078, - "min": 0.005078, - "total": 0.005078 + "max": 0.000261, + "min": 0.000261, + "total": 0.000261 }, "sirius::Simulation_context::init_fft": { - "avg": 0.046758, + "avg": 0.018336, "count": 1, - "max": 0.046758, - "min": 0.046758, - "total": 0.046758 + "max": 0.018336, + "min": 0.018336, + "total": 0.018336 }, "sirius::Simulation_context::initialize": { - "avg": 2.599002, + "avg": 2.586844, "count": 1, - "max": 2.599002, - "min": 2.599002, - "total": 2.599002 + "max": 2.586844, + "min": 2.586844, + "total": 2.586844 }, "sirius::Simulation_context::make_periodic_function": { - "avg": 0.067222, - "count": 3, - "max": 0.067671, - "min": 0.066536, - "total": 0.201666 + "avg": 0.0004158333333333334, + "count": 6, + "max": 0.000704, + "min": 0.00022, + "total": 0.0024950000000000003 }, "sirius::Simulation_context::update": { - "avg": 0.253902, + "avg": 0.222031, "count": 1, - "max": 0.253902, - "min": 0.253902, - "total": 0.253902 + "max": 0.222031, + "min": 0.222031, + "total": 0.222031 }, "sirius::Simulation_parameters::import": { - "avg": 0.000932, + "avg": 0.000218, "count": 1, - "max": 0.000932, - "min": 0.000932, - "total": 0.000932 + "max": 0.000218, + "min": 0.000218, + "total": 0.000218 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.00376630434782609, - "count": 230, - "max": 0.00762, - "min": 0.001486, - "total": 0.8662500000000006 + "avg": 0.0007780506329113924, + "count": 237, + "max": 0.003242, + "min": 0.000304, + "total": 0.184398 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 6.525e-05, + "avg": 3.7000000000000005e-05, "count": 4, - "max": 8.3e-05, - "min": 5.2e-05, - "total": 0.000261 + "max": 4.4e-05, + "min": 3.1e-05, + "total": 0.00014800000000000002 }, "sirius::Smooth_periodic_function|inner": { - "avg": 0.00012713141025641033, - "count": 312, - "max": 0.000324, - "min": 8.4e-05, - "total": 0.03966500000000003 + "avg": 0.00013506349206349218, + "count": 315, + "max": 0.000257, + "min": 9.2e-05, + "total": 0.042545000000000034 + }, + "sirius::Stress|ewald": { + "avg": 0.001568, + "count": 1, + "max": 0.001568, + "min": 0.001568, + "total": 0.001568 + }, + "sirius::Stress|har": { + "avg": 0.000459, + "count": 1, + "max": 0.000459, + "min": 0.000459, + "total": 0.000459 + }, + "sirius::Stress|kin": { + "avg": 0.00177, + "count": 1, + "max": 0.00177, + "min": 0.00177, + "total": 0.00177 + }, + "sirius::Stress|nonloc": { + "avg": 0.068329, + "count": 1, + "max": 0.068329, + "min": 0.068329, + "total": 0.068329 + }, + "sirius::Stress|us": { + "avg": 0.781283, + "count": 1, + "max": 0.781283, + "min": 0.781283, + "total": 0.781283 + }, + "sirius::Stress|us|gemm": { + "avg": 0.001841444444444445, + "count": 36, + "max": 0.002188, + "min": 0.001712, + "total": 0.06629200000000002 + }, + "sirius::Stress|us|phase_fac": { + "avg": 0.00014, + "count": 2, + "max": 0.000145, + "min": 0.000135, + "total": 0.00028 + }, + "sirius::Stress|us|prepare": { + "avg": 0.00016363888888888892, + "count": 36, + "max": 0.000285, + "min": 0.00013, + "total": 0.005891000000000001 + }, + "sirius::Stress|vloc": { + "avg": 0.001372, + "count": 1, + "max": 0.001372, + "min": 0.001372, + "total": 0.001372 }, "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.00039899999999999994, + "avg": 0.000413, "count": 2, - "max": 0.000547, - "min": 0.000251, - "total": 0.0007979999999999999 + "max": 0.00058, + "min": 0.000246, + "total": 0.000826 }, "sirius::Unit_cell::get_symmetry": { - "avg": 0.024004, + "avg": 0.055816, "count": 2, - "max": 0.026567, - "min": 0.021441, - "total": 0.048008 + "max": 0.056738, + "min": 0.054894, + "total": 0.111632 }, "sirius::Unit_cell::initialize": { - "avg": 0.065404, + "avg": 0.091132, "count": 1, - "max": 0.065404, - "min": 0.065404, - "total": 0.065404 + "max": 0.091132, + "min": 0.091132, + "total": 0.091132 }, "sirius::Unit_cell::update": { - "avg": 0.024413999999999998, + "avg": 0.056248, "count": 2, - "max": 0.027127, - "min": 0.021701, - "total": 0.048827999999999996 + "max": 0.05701, + "min": 0.055486, + "total": 0.112496 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { - "avg": 0.023966, + "avg": 0.055769, "count": 2, - "max": 0.02654, - "min": 0.021392, - "total": 0.047932 + "max": 0.056685, + "min": 0.054853, + "total": 0.111538 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { - "avg": 0.023767999999999997, + "avg": 0.0555455, "count": 2, - "max": 0.026303, - "min": 0.021233, - "total": 0.047535999999999995 + "max": 0.056503, + "min": 0.054588, + "total": 0.111091 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { - "avg": 3.35e-05, + "avg": 3.2e-05, "count": 2, - "max": 5.4e-05, - "min": 1.3e-05, - "total": 6.7e-05 + "max": 5e-05, + "min": 1.4e-05, + "total": 6.4e-05 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { - "avg": 0.00012099999999999999, + "avg": 0.0001495, "count": 2, - "max": 0.00013, - "min": 0.000112, - "total": 0.00024199999999999997 + "max": 0.000181, + "min": 0.000118, + "total": 0.000299 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { - "avg": 3.25e-05, + "avg": 2.55e-05, "count": 2, - "max": 5.6e-05, - "min": 9e-06, - "total": 6.5e-05 + "max": 3e-05, + "min": 2.1e-05, + "total": 5.1e-05 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw": { - "avg": 0.009641825000000003, + "avg": 0.006157050000000001, "count": 40, - "max": 0.011461, - "min": 0.008633, - "total": 0.3856730000000001 + "max": 0.008777, + "min": 0.005258, + "total": 0.24628200000000003 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw|local": { - "avg": 0.005626175000000001, + "avg": 0.0054430749999999995, "count": 40, - "max": 0.006926, - "min": 0.004864, - "total": 0.22504700000000002 + "max": 0.007607, + "min": 0.004603, + "total": 0.21772299999999997 }, "sirius::Unit_cell_symmetry::symmetrize_vector_function_pw_1c": { - "avg": 0.009667000000000002, + "avg": 0.0060857, "count": 40, - "max": 0.012168, - "min": 0.008425, - "total": 0.3866800000000001 + "max": 0.007968, + "min": 0.005253, + "total": 0.243428 } } } \ No newline at end of file diff --git a/verification/test5/sirius.json b/verification/test5/sirius.json index c946519a4..ed64ff071 100644 --- a/verification/test5/sirius.json +++ b/verification/test5/sirius.json @@ -4,7 +4,9 @@ "processing_unit" : "cpu", "std_evp_solver_type" : "lapack", "gen_evp_solver_type" : "lapack", - "verbosity" : 1 + "verbosity" : 1, + "print_forces" : true, + "print_stress" : true }, "parameters" : { diff --git a/verification/test6/output_ref.json b/verification/test6/output_ref.json index 1032e5c16..41c985144 100644 --- a/verification/test6/output_ref.json +++ b/verification/test6/output_ref.json @@ -1,890 +1,1052 @@ { - "build_date": "Mon, 8 Oct 2018 10:57:13", "comm_world_size": 1, "counters": { - "band_evp_work_count": 3495.5269685935373, - "local_operator_num_applied": 9377 + "band_evp_work_count": 3495.8551433773337, + "local_operator_num_applied": 9379 }, - "git_hash": "42583bb550104a0c716f903542cfd02c12f9ce3d", + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, "band_gap": 0.0, "chemical_formula": "Fe2", "converged": true, "core_leakage": 0.0, - "efermi": 0.5911535356360933, + "efermi": 0.5911535287250752, "energy": { - "bxc": -0.37423267123623255, + "bxc": -0.3742326470057194, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": -18.280510795624913, + "eval_sum": -18.280511397084677, "ewald": -168.11855617908088, - "exc": -28.85686239167108, - "kin": 76.27464728743095, - "total": -250.2232678193587, - "veff": -94.18092541181964, - "vha": 139.86300088486348, - "vxc": -34.58992931821368 + "exc": -28.85686274712448, + "kin": 76.27464715141691, + "total": -250.2232682099268, + "veff": -94.18092590149587, + "vha": 139.86300058270862, + "vxc": -34.58992975771185 }, "fft_coarse_grid": [24,24,24], - "fft_grid": [45,45,45], + "fft_grid": [48,48,48], + "forces": [ + [-2.6533758004723403e-14,1.7850261812024258e-14,-4.180917362621992e-15], + [-6.3677761646075806e-15,3.802935113300676e-14,1.5957628492689695e-14] + ], "mpi_grid": [1,1], "num_atoms": 2, "num_bands": 26, "num_fv_states": -1, "num_scf_iterations": 20, "omega": 170.17770432671716, - "pw_cutoff": 25.0 + "pw_cutoff": 25.0, + "stress": [ + [-0.0020818522952572756,1.0030885127016853e-35,4.9274717009444885e-32], + [1.0030885127016853e-35,-0.002081852295257279,-4.108851165728643e-32], + [4.9274717009444885e-32,-4.108851165728643e-32,-0.002081852295257168] + ] }, "task": 0, "threads_per_rank": 8, "timers": { - "Eigensolver_lapack::solve_std": { - "avg": 0.0008352418772563173, + "Eigensolver_lapack|zheevr": { + "avg": 0.0006479999999999997, "count": 277, - "max": 0.001602, - "min": 0.000265, - "total": 0.2313619999999999 + "max": 0.001311, + "min": 0.000242, + "total": 0.1794959999999999 }, - "Eigensolver_lapack::solve_std|zheevr": { - "avg": 0.0008276895306859209, - "count": 277, - "max": 0.001592, - "min": 0.000259, - "total": 0.22927000000000008 + "Eigensolver_lapack|zhegvx": { + "avg": 0.0003672329545454546, + "count": 176, + "max": 0.000685, + "min": 0.000239, + "total": 0.06463300000000001 }, "sddk::FFT3D::FFT3D": { - "avg": 0.0065580000000000005, + "avg": 0.0081135, "count": 2, - "max": 0.008089, - "min": 0.005027, - "total": 0.013116000000000001 + "max": 0.009013, + "min": 0.007214, + "total": 0.016227 }, "sddk::FFT3D::prepare": { - "avg": 4.624074074074078e-05, + "avg": 6.22175925925926e-05, "count": 216, - "max": 0.000192, - "min": 3.8e-05, - "total": 0.009988000000000007 + "max": 0.000139, + "min": 4.8e-05, + "total": 0.013439000000000001 }, "sddk::FFT3D::prepare|cpu": { - "avg": 4.3689814814814824e-05, + "avg": 5.72777777777778e-05, "count": 216, - "max": 0.000188, - "min": 3.6e-05, - "total": 0.009437000000000003 + "max": 0.000104, + "min": 4.4e-05, + "total": 0.012372000000000005 }, "sddk::FFT3D::transform": { - "avg": 0.0005434694630721443, - "count": 22219, - "max": 0.006057, - "min": 0.000225, - "total": 12.075347999999975 + "avg": 0.00016667728295096773, + "count": 22230, + "max": 0.00375, + "min": 0.000112, + "total": 3.7052360000000126 }, "sddk::FFT3D::transform_xy": { - "avg": 0.00011410112966380404, - "count": 22219, - "max": 0.001465, - "min": 9.7e-05, - "total": 2.535213000000062 + "avg": 7.924547908232191e-05, + "count": 22230, + "max": 0.00365, + "min": 5.1e-05, + "total": 1.761627000000016 }, "sddk::FFT3D::transform_z": { - "avg": 0.0004242826409829466, - "count": 22219, - "max": 0.005694, - "min": 0.000106, - "total": 9.427136000000091 + "avg": 8.144930274403982e-05, + "count": 22230, + "max": 0.000705, + "min": 4.1e-05, + "total": 1.8106180000000054 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.00042184085692425507, - "count": 22219, - "max": 0.005691, - "min": 0.000104, - "total": 9.372882000000024 + "avg": 7.891659919028421e-05, + "count": 22230, + "max": 0.000702, + "min": 3.8e-05, + "total": 1.754316000000018 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.0004193088797875733, - "count": 22219, - "max": 0.005686, - "min": 0.000102, - "total": 9.316624000000091 + "avg": 7.603648223122013e-05, + "count": 22230, + "max": 0.000695, + "min": 3.6e-05, + "total": 1.6902910000000235 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.0005468333333333334, + "avg": 0.0005636666666666666, "count": 12, - "max": 0.002524, - "min": 0.000149, - "total": 0.006562000000000001 + "max": 0.002449, + "min": 0.000145, + "total": 0.006764 }, "sddk::Gvec::init": { - "avg": 0.0020806666666666664, + "avg": 0.001495, "count": 6, - "max": 0.00941, - "min": 0.000449, - "total": 0.012483999999999999 + "max": 0.006694, + "min": 0.000315, + "total": 0.00897 }, - "sddk::Wave_functions::inner": { - "avg": 0.00020473457311918761, + "sddk::inner": { + "avg": 0.00010430515638207919, "count": 1183, - "max": 0.000478, - "min": 3e-06, - "total": 0.24220099999999894 - }, - "sddk::Wave_functions::orthogonalize": { - "avg": 0.000794028880866425, - "count": 277, - "max": 0.001504, - "min": 0.000133, - "total": 0.2199459999999997 - }, - "sddk::Wave_functions::transform": { - "avg": 0.0002253310901749664, - "count": 743, - "max": 0.000774, - "min": 6.3e-05, - "total": 0.16742100000000004 + "max": 0.000371, + "min": 4e-06, + "total": 0.12339299999999968 }, - "sddk::Wave_functions::transform|init": { - "avg": 2.0134589502018808e-05, - "count": 743, - "max": 0.000268, - "min": 1e-06, - "total": 0.014959999999999975 + "sddk::inner|local": { + "avg": 0.00010120879120879161, + "count": 1183, + "max": 0.000364, + "min": 2e-06, + "total": 0.11973000000000047 }, "sddk::matrix_storage::matrix_storage": { - "avg": 3.821969696969681e-06, + "avg": 1.0738636363636454e-06, "count": 528, - "max": 0.000155, + "max": 2.1e-05, "min": 0.0, - "total": 0.0020179999999999916 + "total": 0.0005670000000000048 }, "sddk::matrix_storage::remap_backward": { - "avg": 7.019867549668902e-07, + "avg": 1.0088300220750639e-06, "count": 453, - "max": 5e-06, + "max": 2e-05, "min": 0.0, - "total": 0.0003180000000000012 + "total": 0.00045700000000000395 }, "sddk::matrix_storage::remap_forward": { - "avg": 1.7359098228663606e-06, + "avg": 4.3462157809984046e-06, "count": 621, - "max": 1.6e-05, + "max": 2.4e-05, + "min": 1e-06, + "total": 0.002699000000000009 + }, + "sddk::matrix_storage::set_num_extra": { + "avg": 1.007448789571704e-06, + "count": 1074, + "max": 2e-05, "min": 0.0, - "total": 0.00107800000000001 + "total": 0.00108200000000001 + }, + "sddk::orthogonalize": { + "avg": 0.000435187725631769, + "count": 277, + "max": 0.001017, + "min": 0.000106, + "total": 0.12054700000000002 + }, + "sddk::orthogonalize|tmtrx": { + "avg": 2.4805054151624554e-05, + "count": 277, + "max": 0.000108, + "min": 1e-06, + "total": 0.006871000000000001 + }, + "sddk::orthogonalize|transform": { + "avg": 5.459566787003609e-05, + "count": 277, + "max": 0.000163, + "min": 6e-06, + "total": 0.015122999999999998 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.008981, + "avg": 0.005404, "count": 1, - "max": 0.008981, - "min": 0.008981, - "total": 0.008981 + "max": 0.005404, + "min": 0.005404, + "total": 0.005404 }, "sddk::remap_gvec_to_shells|remap_backward": { - "avg": 0.0024284047619047614, + "avg": 0.00040352380952380946, "count": 84, - "max": 0.003141, - "min": 0.002324, - "total": 0.20398599999999995 + "max": 0.000831, + "min": 0.000369, + "total": 0.033895999999999996 }, "sddk::remap_gvec_to_shells|remap_forward": { - "avg": 0.002507238095238095, + "avg": 0.0004360952380952381, "count": 84, - "max": 0.004536, - "min": 0.002346, - "total": 0.21060799999999996 + "max": 0.000817, + "min": 0.00039, + "total": 0.036632 + }, + "sddk::transform": { + "avg": 0.00013183983849259756, + "count": 743, + "max": 0.000496, + "min": 4e-05, + "total": 0.09795699999999999 + }, + "sddk::transform|init": { + "avg": 2.137819650067293e-05, + "count": 743, + "max": 0.000239, + "min": 1e-06, + "total": 0.015883999999999985 + }, + "sddk::transform|local": { + "avg": 4.8649653434152484e-05, + "count": 1587, + "max": 0.000201, + "min": 1e-05, + "total": 0.077207 }, "sirius::Atom_type::init": { - "avg": 0.022388, + "avg": 0.02236, "count": 1, - "max": 0.022388, - "min": 0.022388, - "total": 0.022388 + "max": 0.02236, + "min": 0.02236, + "total": 0.02236 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.099544, + "avg": 0.107548, "count": 1, - "max": 0.099544, - "min": 0.099544, - "total": 0.099544 + "max": 0.107548, + "min": 0.107548, + "total": 0.107548 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { + "avg": 0.08588683333333334, + "count": 6, + "max": 0.091062, + "min": 0.082155, + "total": 0.515321 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { + "avg": 0.0783845, + "count": 6, + "max": 0.082133, + "min": 0.074063, + "total": 0.470307 + }, + "sirius::Augmentation_operator_gvec_deriv|constructor": { + "avg": 0.063434, + "count": 1, + "max": 0.063434, + "min": 0.063434, + "total": 0.063434 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.1377175952380952, + "avg": 0.05997697619047616, "count": 84, - "max": 0.211703, - "min": 0.063042, - "total": 11.568277999999998 + "max": 0.093317, + "min": 0.02602, + "total": 5.038065999999997 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 5.867857142857143e-05, + "avg": 2.0380952380952393e-05, "count": 84, - "max": 8.5e-05, - "min": 5.2e-05, - "total": 0.004929 + "max": 4.8e-05, + "min": 1.6e-05, + "total": 0.001712000000000001 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.000640307865168539, + "avg": 0.0005441168539325839, "count": 445, - "max": 0.001605, - "min": 0.000261, - "total": 0.2849369999999999 + "max": 0.001315, + "min": 0.000242, + "total": 0.24213199999999985 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.13636534523809526, + "avg": 0.058963964285714245, "count": 84, - "max": 0.210409, - "min": 0.061604, - "total": 11.454689000000002 + "max": 0.092319, + "min": 0.025097, + "total": 4.9529729999999965 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 0.00018781547619047621, + "avg": 0.00010051785714285706, "count": 168, - "max": 0.000426, - "min": 0.0001, - "total": 0.031553000000000005 - }, - "sirius::Band::diag_pseudo_potential_davidson|wf": { - "avg": 1.5011904761904754e-05, - "count": 84, - "max": 5.9e-05, - "min": 1.3e-05, - "total": 0.0012609999999999993 + "max": 0.000226, + "min": 5.3e-05, + "total": 0.016886999999999985 }, "sirius::Band::initialize_subspace": { - "avg": 0.538727, + "avg": 0.106534, "count": 1, - "max": 0.538727, - "min": 0.538727, - "total": 0.538727 + "max": 0.106534, + "min": 0.106534, + "total": 0.106534 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.13374075000000002, + "avg": 0.0264575, "count": 4, - "max": 0.143197, - "min": 0.12663, - "total": 0.5349630000000001 + "max": 0.027934, + "min": 0.025401, + "total": 0.10583 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.06895275000000001, + "avg": 0.00044175, "count": 4, - "max": 0.079506, - "min": 0.062636, - "total": 0.27581100000000003 + "max": 0.000538, + "min": 0.000406, + "total": 0.001767 }, "sirius::Band::residuals": { - "avg": 0.0002955191011235958, + "avg": 0.00029982696629213523, "count": 445, - "max": 0.000884, + "max": 0.000669, "min": 0.0, - "total": 0.13150600000000015 + "total": 0.13342300000000018 }, "sirius::Band::residuals_aux": { - "avg": 0.0002460862068965517, + "avg": 0.00033686206896551754, "count": 290, - "max": 0.000526, - "min": 0.000195, - "total": 0.071365 + "max": 0.000486, + "min": 0.00025, + "total": 0.09769000000000008 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.00032124642289348306, + "avg": 0.00023181240063592986, "count": 629, - "max": 0.000697, - "min": 0.000147, - "total": 0.20206400000000083 + "max": 0.000566, + "min": 0.000101, + "total": 0.14580999999999988 }, "sirius::Band::solve": { - "avg": 0.5532236190476191, + "avg": 0.2409085714285715, "count": 21, - "max": 0.823375, - "min": 0.259538, - "total": 11.617696 + "max": 0.363297, + "min": 0.107125, + "total": 5.059080000000002 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.00040475000000000005, + "avg": 0.0007245, "count": 4, - "max": 0.000479, - "min": 0.000335, - "total": 0.0016190000000000002 + "max": 0.000753, + "min": 0.000696, + "total": 0.002898 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.00039875, + "avg": 0.00040425, "count": 4, - "max": 0.000467, - "min": 0.00033, - "total": 0.001595 + "max": 0.00045, + "min": 0.000344, + "total": 0.001617 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 2.906976744186044e-07, - "count": 172, + "avg": 3.510638297872336e-07, + "count": 188, "max": 1e-06, "min": 0.0, - "total": 4.9999999999999955e-05 + "total": 6.599999999999991e-05 }, "sirius::Beta_projectors_base::generate": { - "avg": 0.00011381750465549355, - "count": 537, - "max": 0.000483, - "min": 0.000101, - "total": 0.061120000000000035 + "avg": 0.00011180769230769228, + "count": 52, + "max": 0.000318, + "min": 9e-05, + "total": 0.005813999999999998 }, "sirius::Beta_projectors_base::inner": { - "avg": 0.00017831239935587757, - "count": 621, - "max": 0.000683, - "min": 2.8e-05, - "total": 0.11073199999999997 + "avg": 0.00011977626193724427, + "count": 733, + "max": 0.000297, + "min": 2.7e-05, + "total": 0.08779600000000005 + }, + "sirius::Beta_projectors_base::local_inner_aux": { + "avg": 0.00011609959072305596, + "count": 733, + "max": 0.000293, + "min": 2.5e-05, + "total": 0.08510100000000002 }, "sirius::Beta_projectors_base::prepare": { - "avg": 2.8662790697674574e-06, - "count": 172, - "max": 0.000197, - "min": 0.0, - "total": 0.0004930000000000027 + "avg": 1.125e-06, + "count": 8, + "max": 2e-06, + "min": 1e-06, + "total": 9e-06 + }, + "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { + "avg": 0.0015689999999999999, + "count": 4, + "max": 0.001927, + "min": 0.001032, + "total": 0.0062759999999999995 }, "sirius::Broyden1::mix": { - "avg": 0.0030785238095238088, + "avg": 0.0018760952380952381, "count": 21, - "max": 0.00476, - "min": 0.00015, - "total": 0.06464899999999998 + "max": 0.002927, + "min": 9e-05, + "total": 0.039398 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.087369, + "avg": 0.000469, "count": 1, - "max": 0.087369, - "min": 0.087369, - "total": 0.087369 + "max": 0.000469, + "min": 0.000469, + "total": 0.000469 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 32.679964, + "avg": 8.677126, "count": 1, - "max": 32.679964, - "min": 32.679964, - "total": 32.679964 + "max": 8.677126, + "min": 8.677126, + "total": 8.677126 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 1.5540868571428574, + "avg": 0.4128413333333335, "count": 21, - "max": 1.876189, - "min": 1.241563, - "total": 32.63582400000001 + "max": 0.535769, + "min": 0.274566, + "total": 8.669668000000003 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.0006943452380952384, + "avg": 0.0005108809523809521, "count": 84, - "max": 0.001301, - "min": 0.00062, - "total": 0.05832500000000002 + "max": 0.000604, + "min": 0.000439, + "total": 0.04291399999999998 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.022075666666666667, + "avg": 0.008870988095238098, "count": 84, - "max": 0.025843, - "min": 0.019946, - "total": 1.8543560000000001 + "max": 0.010613, + "min": 0.007719, + "total": 0.7451630000000002 }, "sirius::Density::augment": { - "avg": 0.1424794285714286, + "avg": 0.059102666666666664, "count": 21, - "max": 0.17512, - "min": 0.136961, - "total": 2.992068 + "max": 0.063403, + "min": 0.057102, + "total": 1.241156 }, "sirius::Density::compute_atomic_mag_mom": { - "avg": 0.000256, + "avg": 0.000184, "count": 1, - "max": 0.000256, - "min": 0.000256, - "total": 0.000256 + "max": 0.000184, + "min": 0.000184, + "total": 0.000184 }, "sirius::Density::generate": { - "avg": 0.23587214285714286, + "avg": 0.09730795238095237, "count": 21, - "max": 0.268042, - "min": 0.229048, - "total": 4.953315 + "max": 0.101481, + "min": 0.093883, + "total": 2.0434669999999997 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.094675, + "avg": 0.001226, "count": 1, - "max": 0.094675, - "min": 0.094675, - "total": 0.094675 + "max": 0.001226, + "min": 0.001226, + "total": 0.001226 }, "sirius::Density::generate_rho_aug": { - "avg": 0.1420870476190476, + "avg": 0.05890785714285714, "count": 21, - "max": 0.174876, - "min": 0.136659, - "total": 2.983828 + "max": 0.063222, + "min": 0.056891, + "total": 1.2370649999999999 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.019095976190476188, + "avg": 0.022349904761904755, "count": 42, - "max": 0.03654, - "min": 0.005991, - "total": 0.8020309999999999 - }, - "sirius::Density::generate_rho_aug|phase_fac": { - "avg": 0.08926528571428573, - "count": 21, - "max": 0.110632, - "min": 0.086429, - "total": 1.8745710000000002 + "max": 0.042483, + "min": 0.004998, + "total": 0.9386959999999998 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.004764928571428573, + "avg": 0.004007428571428571, "count": 42, - "max": 0.007119, - "min": 0.003686, - "total": 0.20012700000000005 + "max": 0.005658, + "min": 0.003133, + "total": 0.16831199999999996 }, "sirius::Density::generate_valence": { - "avg": 0.23586728571428567, + "avg": 0.09730314285714285, "count": 21, - "max": 0.268038, - "min": 0.229042, - "total": 4.953212999999999 + "max": 0.101476, + "min": 0.093878, + "total": 2.043366 }, "sirius::Density::initial_density": { - "avg": 0.100415, + "avg": 0.003767, "count": 1, - "max": 0.100415, - "min": 0.100415, - "total": 0.100415 + "max": 0.003767, + "min": 0.003767, + "total": 0.003767 }, "sirius::Density::symmetrize_density_matrix": { - "avg": 0.00932152380952381, + "avg": 0.009243428571428573, "count": 21, - "max": 0.013961, - "min": 0.008859, - "total": 0.19575200000000004 + "max": 0.011654, + "min": 0.008684, + "total": 0.194112 }, "sirius::Density::update": { - "avg": 0.095054, + "avg": 0.001263, "count": 1, - "max": 0.095054, - "min": 0.095054, - "total": 0.095054 + "max": 0.001263, + "min": 0.001263, + "total": 0.001263 }, "sirius::Field4D::symmetrize": { - "avg": 0.02820519047619048, + "avg": 0.018197214285714285, "count": 42, - "max": 0.032753, - "min": 0.025332, - "total": 1.1846180000000002 + "max": 0.025535, + "min": 0.016768, + "total": 0.764283 + }, + "sirius::Force::calc_forces_core": { + "avg": 0.002661, + "count": 1, + "max": 0.002661, + "min": 0.002661, + "total": 0.002661 + }, + "sirius::Force::calc_forces_ewald": { + "avg": 0.001996, + "count": 1, + "max": 0.001996, + "min": 0.001996, + "total": 0.001996 + }, + "sirius::Force::calc_forces_nonloc": { + "avg": 0.009505, + "count": 1, + "max": 0.009505, + "min": 0.009505, + "total": 0.009505 + }, + "sirius::Force::calc_forces_scf_corr": { + "avg": 0.001252, + "count": 1, + "max": 0.001252, + "min": 0.001252, + "total": 0.001252 + }, + "sirius::Force::calc_forces_us": { + "avg": 0.020248, + "count": 1, + "max": 0.020248, + "min": 0.020248, + "total": 0.020248 + }, + "sirius::Force::calc_forces_vloc": { + "avg": 0.001409, + "count": 1, + "max": 0.001409, + "min": 0.001409, + "total": 0.001409 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.023887565121412782, + "avg": 0.009661174392935979, "count": 453, - "max": 0.035715, - "min": 0.001412, - "total": 10.82106699999999 + "max": 0.017944, + "min": 0.000626, + "total": 4.376511999999998 }, "sirius::Hamiltonian::get_h_diag": { - "avg": 0.0008294999999999998, + "avg": 0.0006448928571428572, "count": 84, - "max": 0.001233, - "min": 0.000782, - "total": 0.06967799999999999 + "max": 0.000953, + "min": 0.000572, + "total": 0.054171000000000004 }, "sirius::Hamiltonian::get_o_diag": { - "avg": 0.000417392857142857, + "avg": 0.0003068928571428572, "count": 84, - "max": 0.000593, - "min": 0.000387, - "total": 0.03506099999999999 - }, - "sirius::Hamiltonian::prepare": { - "avg": 1.8727272727272724e-05, - "count": 22, - "max": 5.6e-05, - "min": 1.5e-05, - "total": 0.00041199999999999993 + "max": 0.000471, + "min": 0.000279, + "total": 0.025779000000000003 }, "sirius::K_point::K_point": { - "avg": 1e-06, + "avg": 1.4999999999999998e-06, "count": 4, - "max": 2e-06, + "max": 4e-06, "min": 0.0, - "total": 4e-06 + "total": 5.999999999999999e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.0005434999999999999, + "avg": 0.00036525, "count": 4, - "max": 0.000568, - "min": 0.00053, - "total": 0.0021739999999999997 + "max": 0.000383, + "min": 0.000331, + "total": 0.001461 }, "sirius::K_point::initialize": { - "avg": 0.00167475, + "avg": 0.0013552500000000001, "count": 4, - "max": 0.001792, - "min": 0.00151, - "total": 0.006699 + "max": 0.001402, + "min": 0.001283, + "total": 0.0054210000000000005 }, "sirius::K_point::update": { - "avg": 0.00080375, + "avg": 0.0009605000000000001, "count": 4, - "max": 0.000912, - "min": 0.000718, - "total": 0.003215 + "max": 0.001003, + "min": 0.000873, + "total": 0.0038420000000000004 }, "sirius::K_point_set::add_kpoint": { - "avg": 5e-06, + "avg": 5.750000000000001e-06, "count": 4, - "max": 1.3e-05, + "max": 1.5e-05, "min": 2e-06, - "total": 2e-05 + "total": 2.3000000000000003e-05 }, "sirius::K_point_set::create_k_mesh": { - "avg": 0.011152, + "avg": 0.016361, "count": 1, - "max": 0.011152, - "min": 0.011152, - "total": 0.011152 + "max": 0.016361, + "min": 0.016361, + "total": 0.016361 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 0.00032852380952380953, + "avg": 0.00043561904761904764, "count": 21, - "max": 0.000411, - "min": 0.0002, - "total": 0.006899000000000001 + "max": 0.000548, + "min": 0.00034, + "total": 0.009148 }, "sirius::K_point_set::initialize": { - "avg": 0.007072, + "avg": 0.005516, "count": 1, - "max": 0.007072, - "min": 0.007072, - "total": 0.007072 + "max": 0.005516, + "min": 0.005516, + "total": 0.005516 }, "sirius::K_point_set::sync_band_energies": { - "avg": 9.285714285714288e-06, + "avg": 5.714285714285714e-06, "count": 21, - "max": 1.7e-05, - "min": 8e-06, - "total": 0.00019500000000000002 + "max": 1.2e-05, + "min": 4e-06, + "total": 0.00011999999999999999 }, "sirius::Local_operator::apply_h": { - "avg": 0.023259295805739513, + "avg": 0.009281467991169978, "count": 453, - "max": 0.035017, - "min": 0.001116, - "total": 10.536461 + "max": 0.017347, + "min": 0.000437, + "total": 4.204505 }, "sirius::Local_operator::prepare": { - "avg": 0.00043125454545454584, + "avg": 0.00013174545454545448, "count": 110, - "max": 0.00366, - "min": 1.5e-05, - "total": 0.04743800000000004 + "max": 0.000664, + "min": 1.6e-05, + "total": 0.014491999999999993 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 9.090909090909087e-07, + "avg": 1.4318181818181817e-06, "count": 44, - "max": 2e-06, + "max": 8e-06, "min": 0.0, - "total": 3.999999999999998e-05 + "total": 6.3e-05 }, "sirius::Non_local_operator::apply": { - "avg": 0.00014943046357615892, + "avg": 0.0001135728476821192, "count": 906, - "max": 0.000353, - "min": 6.6e-05, - "total": 0.13538399999999998 + "max": 0.000274, + "min": 6.2e-05, + "total": 0.102897 }, "sirius::Periodic_function::add": { - "avg": 0.00012220454545454546, + "avg": 8.186363636363637e-05, "count": 44, - "max": 0.000228, - "min": 5.7e-05, - "total": 0.005377000000000001 + "max": 0.000175, + "min": 5.8e-05, + "total": 0.003602 }, "sirius::Periodic_function::inner": { - "avg": 7.345419847328245e-05, - "count": 262, - "max": 0.000232, - "min": 5.1e-05, - "total": 0.019245 + "avg": 8.19507575757576e-05, + "count": 264, + "max": 0.000167, + "min": 6e-05, + "total": 0.02163500000000001 }, "sirius::Periodic_function::integrate": { - "avg": 5.6767441860465115e-05, + "avg": 7.451162790697673e-05, "count": 43, - "max": 0.000108, - "min": 5.1e-05, - "total": 0.002441 + "max": 0.000124, + "min": 6.1e-05, + "total": 0.0032039999999999994 }, "sirius::Potential::Potential": { - "avg": 0.101233, + "avg": 0.009181, "count": 1, - "max": 0.101233, - "min": 0.101233, - "total": 0.101233 + "max": 0.009181, + "min": 0.009181, + "total": 0.009181 }, "sirius::Potential::generate": { - "avg": 0.6626453181818182, + "avg": 0.022658909090909087, "count": 22, - "max": 0.694008, - "min": 0.350729, - "total": 14.578197 + "max": 0.025939, + "min": 0.020558, + "total": 0.4984959999999999 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.5469468181818182, + "avg": 0.008631227272727275, "count": 22, - "max": 0.578661, - "min": 0.236904, - "total": 12.03283 + "max": 0.009833, + "min": 0.007159, + "total": 0.18988700000000006 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 1.3181818181818184e-06, + "avg": 2.727272727272727e-07, "count": 22, - "max": 1.7e-05, + "max": 1e-06, "min": 0.0, - "total": 2.9000000000000004e-05 + "total": 5.999999999999999e-06 }, "sirius::Potential::generate_local_potential": { - "avg": 0.098205, + "avg": 0.00278, "count": 1, - "max": 0.098205, - "min": 0.098205, - "total": 0.098205 + "max": 0.00278, + "min": 0.00278, + "total": 0.00278 }, "sirius::Potential::poisson": { - "avg": 0.09567627272727272, + "avg": 0.0007300454545454545, "count": 22, - "max": 0.109989, - "min": 0.092351, - "total": 2.104878 + "max": 0.000931, + "min": 0.000659, + "total": 0.016061 }, "sirius::Potential::update": { - "avg": 0.098578, + "avg": 0.002817, "count": 1, - "max": 0.098578, - "min": 0.098578, - "total": 0.098578 + "max": 0.002817, + "min": 0.002817, + "total": 0.002817 }, "sirius::Potential::xc": { - "avg": 0.010711409090909094, + "avg": 0.01165031818181818, "count": 22, - "max": 0.014465, - "min": 0.008756, - "total": 0.23565100000000008 + "max": 0.013726, + "min": 0.009467, + "total": 0.25630699999999995 }, "sirius::Potential::xc_rg_magnetic": { - "avg": 0.010705909090909092, + "avg": 0.01164209090909091, "count": 22, - "max": 0.014458, - "min": 0.008749, - "total": 0.23553000000000002 + "max": 0.013717, + "min": 0.009458, + "total": 0.256126 }, "sirius::Potential::xc_rg_magnetic|libxc": { - "avg": 0.007783045454545451, + "avg": 0.006000727272727274, "count": 22, - "max": 0.008849, - "min": 0.00448, - "total": 0.17122699999999993 + "max": 0.007932, + "min": 0.002751, + "total": 0.13201600000000002 }, "sirius::Potential::xc_rg_magnetic|up_dn": { - "avg": 0.00046645454545454556, + "avg": 0.0005678181818181818, "count": 22, - "max": 0.001146, - "min": 0.000421, - "total": 0.010262000000000002 + "max": 0.000923, + "min": 0.000502, + "total": 0.012492 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.0893815, + "avg": 0.089611, "count": 2, - "max": 0.090058, - "min": 0.088705, - "total": 0.178763 + "max": 0.09061, + "min": 0.088612, + "total": 0.179222 }, "sirius::Radial_integrals|aug": { - "avg": 0.4679225, + "avg": 0.46867250000000005, "count": 2, - "max": 0.530208, - "min": 0.405637, - "total": 0.935845 + "max": 0.526711, + "min": 0.410634, + "total": 0.9373450000000001 }, "sirius::Radial_integrals|beta": { - "avg": 0.082412, + "avg": 0.0878515, "count": 2, - "max": 0.085265, - "min": 0.079559, - "total": 0.164824 + "max": 0.088297, + "min": 0.087406, + "total": 0.175703 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.0222705, + "avg": 0.0206685, "count": 2, - "max": 0.022796, - "min": 0.021745, - "total": 0.044541 + "max": 0.021783, + "min": 0.019554, + "total": 0.041337 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.021743, + "avg": 0.018608, "count": 1, - "max": 0.021743, - "min": 0.021743, - "total": 0.021743 + "max": 0.018608, + "min": 0.018608, + "total": 0.018608 }, "sirius::Radial_integrals|vloc": { - "avg": 0.0669845, + "avg": 0.06851499999999999, "count": 2, - "max": 0.070152, - "min": 0.063817, - "total": 0.133969 + "max": 0.071501, + "min": 0.065529, + "total": 0.13702999999999999 }, "sirius::Simulation_context::init_atoms_to_grid_idx": { - "avg": 0.001021, + "avg": 0.00124, "count": 1, - "max": 0.001021, - "min": 0.001021, - "total": 0.001021 + "max": 0.00124, + "min": 0.00124, + "total": 0.00124 }, "sirius::Simulation_context::init_comm": { - "avg": 0.000566, + "avg": 0.000304, "count": 1, - "max": 0.000566, - "min": 0.000566, - "total": 0.000566 + "max": 0.000304, + "min": 0.000304, + "total": 0.000304 }, "sirius::Simulation_context::init_fft": { - "avg": 0.035597, + "avg": 0.029874, "count": 1, - "max": 0.035597, - "min": 0.035597, - "total": 0.035597 + "max": 0.029874, + "min": 0.029874, + "total": 0.029874 }, "sirius::Simulation_context::initialize": { - "avg": 1.749906, + "avg": 1.677389, "count": 1, - "max": 1.749906, - "min": 1.749906, - "total": 1.749906 + "max": 1.677389, + "min": 1.677389, + "total": 1.677389 }, "sirius::Simulation_context::make_periodic_function": { - "avg": 0.090154, - "count": 3, - "max": 0.092468, - "min": 0.08714, - "total": 0.270462 + "avg": 0.0003773333333333334, + "count": 6, + "max": 0.000705, + "min": 0.000184, + "total": 0.0022640000000000004 }, "sirius::Simulation_context::update": { - "avg": 0.204866, + "avg": 0.123634, "count": 1, - "max": 0.204866, - "min": 0.204866, - "total": 0.204866 + "max": 0.123634, + "min": 0.123634, + "total": 0.123634 }, "sirius::Simulation_parameters::import": { - "avg": 0.000369, + "avg": 0.000179, "count": 1, - "max": 0.000369, - "min": 0.000369, - "total": 0.000369 + "max": 0.000179, + "min": 0.000179, + "total": 0.000179 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.00302611203319502, - "count": 241, - "max": 0.00621, - "min": 0.000838, - "total": 0.7292929999999999 + "avg": 0.00044910887096774187, + "count": 248, + "max": 0.00202, + "min": 0.00016, + "total": 0.11137899999999998 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 8.75e-05, + "avg": 4.575e-05, "count": 4, - "max": 0.000136, - "min": 6.2e-05, - "total": 0.00035 + "max": 5.1e-05, + "min": 4.2e-05, + "total": 0.000183 }, "sirius::Smooth_periodic_function|inner": { - "avg": 6.964220183486232e-05, - "count": 327, - "max": 0.000227, - "min": 5e-05, - "total": 0.02277299999999998 + "avg": 7.931818181818184e-05, + "count": 330, + "max": 0.000147, + "min": 5.7e-05, + "total": 0.026175000000000007 + }, + "sirius::Stress|ewald": { + "avg": 0.002164, + "count": 1, + "max": 0.002164, + "min": 0.002164, + "total": 0.002164 + }, + "sirius::Stress|har": { + "avg": 0.000803, + "count": 1, + "max": 0.000803, + "min": 0.000803, + "total": 0.000803 + }, + "sirius::Stress|kin": { + "avg": 0.001027, + "count": 1, + "max": 0.001027, + "min": 0.001027, + "total": 0.001027 + }, + "sirius::Stress|nonloc": { + "avg": 0.028142, + "count": 1, + "max": 0.028142, + "min": 0.028142, + "total": 0.028142 + }, + "sirius::Stress|us": { + "avg": 0.636078, + "count": 1, + "max": 0.636078, + "min": 0.636078, + "total": 0.636078 + }, + "sirius::Stress|us|gemm": { + "avg": 0.002452888888888889, + "count": 18, + "max": 0.002809, + "min": 0.002332, + "total": 0.044152000000000004 + }, + "sirius::Stress|us|phase_fac": { + "avg": 0.000167, + "count": 1, + "max": 0.000167, + "min": 0.000167, + "total": 0.000167 + }, + "sirius::Stress|us|prepare": { + "avg": 0.00017755555555555556, + "count": 18, + "max": 0.000292, + "min": 0.000146, + "total": 0.003196 + }, + "sirius::Stress|vloc": { + "avg": 0.001502, + "count": 1, + "max": 0.001502, + "min": 0.001502, + "total": 0.001502 }, "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.0003905, + "avg": 0.0002855, "count": 2, - "max": 0.000572, - "min": 0.000209, - "total": 0.000781 + "max": 0.000442, + "min": 0.000129, + "total": 0.000571 }, "sirius::Unit_cell::get_symmetry": { - "avg": 0.004588, + "avg": 0.0102845, "count": 2, - "max": 0.004746, - "min": 0.00443, - "total": 0.009176 + "max": 0.010329, + "min": 0.01024, + "total": 0.020569 }, "sirius::Unit_cell::initialize": { - "avg": 0.027739, + "avg": 0.033165, "count": 1, - "max": 0.027739, - "min": 0.027739, - "total": 0.027739 + "max": 0.033165, + "min": 0.033165, + "total": 0.033165 }, "sirius::Unit_cell::update": { - "avg": 0.004988, + "avg": 0.010578500000000001, "count": 2, - "max": 0.005329, - "min": 0.004647, - "total": 0.009976 + "max": 0.010781, + "min": 0.010376, + "total": 0.021157000000000002 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { - "avg": 0.0045520000000000005, + "avg": 0.010259500000000001, "count": 2, - "max": 0.004728, - "min": 0.004376, - "total": 0.009104000000000001 + "max": 0.010302, + "min": 0.010217, + "total": 0.020519000000000003 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { - "avg": 0.004252000000000001, + "avg": 0.010023500000000001, "count": 2, - "max": 0.004341, - "min": 0.004163, - "total": 0.008504000000000001 + "max": 0.010083, + "min": 0.009964, + "total": 0.020047000000000002 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { - "avg": 7.6e-05, + "avg": 6.7e-05, "count": 2, - "max": 0.000105, - "min": 4.7e-05, - "total": 0.000152 + "max": 8.6e-05, + "min": 4.8e-05, + "total": 0.000134 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { - "avg": 0.00015900000000000002, + "avg": 0.00011549999999999999, "count": 2, - "max": 0.000192, - "min": 0.000126, - "total": 0.00031800000000000003 + "max": 0.000129, + "min": 0.000102, + "total": 0.00023099999999999998 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { - "avg": 4.2999999999999995e-05, + "avg": 2.4e-05, "count": 2, - "max": 6.5e-05, - "min": 2.1e-05, - "total": 8.599999999999999e-05 + "max": 3e-05, + "min": 1.8e-05, + "total": 4.8e-05 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw": { - "avg": 0.014141999999999998, + "avg": 0.009174880952380952, "count": 42, - "max": 0.018001, - "min": 0.012485, - "total": 0.5939639999999999 + "max": 0.012615, + "min": 0.008294, + "total": 0.385345 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw|local": { - "avg": 0.00884095238095238, + "avg": 0.008232642857142857, "count": 42, - "max": 0.010723, - "min": 0.007598, - "total": 0.37132 + "max": 0.01121, + "min": 0.007431, + "total": 0.345771 }, "sirius::Unit_cell_symmetry::symmetrize_vector_function_pw_1c": { - "avg": 0.014053833333333337, + "avg": 0.00901309523809524, "count": 42, - "max": 0.015606, - "min": 0.012572, - "total": 0.5902610000000001 + "max": 0.012905, + "min": 0.008342, + "total": 0.37855000000000005 } } } \ No newline at end of file diff --git a/verification/test6/sirius.json b/verification/test6/sirius.json index 3c9e728f1..636269b8a 100644 --- a/verification/test6/sirius.json +++ b/verification/test6/sirius.json @@ -3,7 +3,9 @@ "processing_unit" : "cpu", "std_evp_solver_type" : "lapack", "gen_evp_solver_type" : "lapack", - "verbosity" : 1 + "verbosity" : 1, + "print_forces" : true, + "print_stress" : true }, "parameters" : { diff --git a/verification/test7/output_ref.json b/verification/test7/output_ref.json index 43f277998..d3fab229a 100644 --- a/verification/test7/output_ref.json +++ b/verification/test7/output_ref.json @@ -1,925 +1,1086 @@ { - "build_date": "Mon, 8 Oct 2018 10:57:13", "comm_world_size": 1, "counters": { - "band_evp_work_count": 11187.194488992598, - "local_operator_num_applied": 17177 + "band_evp_work_count": 11182.241288817646, + "local_operator_num_applied": 17175 }, - "git_hash": "42583bb550104a0c716f903542cfd02c12f9ce3d", + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, "band_gap": 0.0, "chemical_formula": "Ni", "converged": true, "core_leakage": 0.0, - "efermi": 0.6562422015409874, + "efermi": 0.6562422015590461, "energy": { - "bxc": -1.907546560700737e-08, + "bxc": -1.9033743486017663e-08, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": -10.663581764475978, + "eval_sum": -10.663581763879625, "ewald": -111.75579413957684, - "exc": -17.773202625138655, - "kin": 48.7984690762543, - "total": -171.62792921689157, - "veff": -59.46205082165481, - "vha": 107.41492591634857, - "vxc": -22.27211225139873 - }, - "fft_coarse_grid": [18,18,18], + "exc": -17.773202625152788, + "kin": 48.79846907641303, + "total": -171.62792921687515, + "veff": -59.46205082125891, + "vha": 107.41492591752277, + "vxc": -22.272112251461746 + }, + "fft_coarse_grid": [20,20,20], "fft_grid": [40,40,40], + "forces": [ + [-1.2022743257142496e-15,-1.9902865387056292e-15,-1.4383064087696335e-15] + ], "mpi_grid": [1,1], "num_atoms": 1, "num_bands": 19, "num_fv_states": -1, "num_scf_iterations": 24, "omega": 73.39284359469754, - "pw_cutoff": 25.0 + "pw_cutoff": 25.0, + "stress": [ + [-0.0019012642434685784,3.2098832406453934e-35,-4.9692599572251803e-20], + [3.2098832406453934e-35,-0.0019012642434686478,-4.5175090520228746e-20], + [-4.9692599572251803e-20,-4.5175090520228746e-20,-0.0019012642434688906] + ] }, "task": 0, "threads_per_rank": 8, "timers": { - "Eigensolver_lapack::solve_std": { - "avg": 0.0005228887573964493, - "count": 845, - "max": 0.001231, - "min": 0.000185, - "total": 0.44184099999999965 - }, - "Eigensolver_lapack::solve_std|zheevr": { - "avg": 0.0005163112426035498, - "count": 845, - "max": 0.00122, - "min": 0.000181, - "total": 0.4362829999999996 + "Eigensolver_lapack|zheevr": { + "avg": 0.0004412642180094789, + "count": 844, + "max": 0.000905, + "min": 0.000178, + "total": 0.3724270000000002 + }, + "Eigensolver_lapack|zhegvx": { + "avg": 0.00024068509615384626, + "count": 416, + "max": 0.000449, + "min": 0.000186, + "total": 0.10012500000000005 }, "sddk::FFT3D::FFT3D": { - "avg": 0.005303, + "avg": 0.0033599999999999997, "count": 2, - "max": 0.005538, - "min": 0.005068, - "total": 0.010606 + "max": 0.00501, + "min": 0.00171, + "total": 0.006719999999999999 }, "sddk::FFT3D::prepare": { - "avg": 4.500652173913038e-05, + "avg": 6.38239130434783e-05, "count": 460, - "max": 0.000128, - "min": 3.7e-05, - "total": 0.020702999999999975 + "max": 0.000118, + "min": 4.7e-05, + "total": 0.029359000000000017 }, "sddk::FFT3D::prepare|cpu": { - "avg": 4.247173913043474e-05, + "avg": 5.8921739130434734e-05, "count": 460, - "max": 0.000123, - "min": 3.4e-05, - "total": 0.01953699999999998 + "max": 0.000113, + "min": 4.3e-05, + "total": 0.02710399999999998 }, "sddk::FFT3D::transform": { - "avg": 0.0004168877702253867, - "count": 39134, - "max": 0.005309, - "min": 0.000201, - "total": 16.314486000000283 + "avg": 0.00014986147517947757, + "count": 39141, + "max": 0.001157, + "min": 0.000101, + "total": 5.865727999999931 }, "sddk::FFT3D::transform_xy": { - "avg": 0.00011027689477180644, - "count": 39134, - "max": 0.001165, - "min": 8.2e-05, - "total": 4.315575999999873 + "avg": 6.736751743695394e-05, + "count": 39141, + "max": 0.000716, + "min": 3.3e-05, + "total": 2.6368319999998144 }, "sddk::FFT3D::transform_z": { - "avg": 0.00030128944140643776, - "count": 39134, - "max": 0.004853, - "min": 9.5e-05, - "total": 11.790660999999536 + "avg": 7.670889859737992e-05, + "count": 39141, + "max": 0.000432, + "min": 4.3e-05, + "total": 3.0024630000000476 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.0002988104206061137, - "count": 39134, - "max": 0.004849, - "min": 9.3e-05, - "total": 11.693646999999652 + "avg": 7.423453667509832e-05, + "count": 39141, + "max": 0.000427, + "min": 4e-05, + "total": 2.905614000000023 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.0002960377676700479, - "count": 39134, - "max": 0.004826, - "min": 9.1e-05, - "total": 11.585141999999655 + "avg": 7.14774788584868e-05, + "count": 39141, + "max": 0.000408, + "min": 3.8e-05, + "total": 2.797700000000032 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.00022620000000000002, + "avg": 0.00019475, "count": 20, "max": 0.00089, - "min": 9e-05, - "total": 0.004524 + "min": 5.5e-05, + "total": 0.003895 }, "sddk::Gvec::init": { - "avg": 0.0007110999999999999, + "avg": 0.0005293, "count": 10, - "max": 0.004227, - "min": 0.000212, - "total": 0.007110999999999999 - }, - "sddk::Wave_functions::inner": { - "avg": 5.306207306207328e-05, - "count": 3367, - "max": 0.000204, + "max": 0.003254, + "min": 0.000139, + "total": 0.005293 + }, + "sddk::inner": { + "avg": 3.122711058263976e-05, + "count": 3364, + "max": 0.000115, "min": 2e-06, - "total": 0.17866000000000073 - }, - "sddk::Wave_functions::orthogonalize": { - "avg": 0.00028510650887573975, - "count": 845, - "max": 0.000744, - "min": 8.8e-05, - "total": 0.24091500000000007 - }, - "sddk::Wave_functions::transform": { - "avg": 9.544821092278702e-05, - "count": 2124, - "max": 0.000624, - "min": 4.2e-05, - "total": 0.20273199999999963 - }, - "sddk::Wave_functions::transform|init": { - "avg": 6.118644067796615e-06, - "count": 2124, - "max": 0.000149, - "min": 0.0, - "total": 0.012996000000000011 + "total": 0.10504800000000017 + }, + "sddk::inner|local": { + "avg": 2.8460463733650657e-05, + "count": 3364, + "max": 0.000111, + "min": 1e-06, + "total": 0.09574100000000081 }, "sddk::matrix_storage::matrix_storage": { - "avg": 7.556089743589863e-07, + "avg": 1.0216346153846129e-06, "count": 1248, - "max": 2.1e-05, + "max": 2e-05, "min": 0.0, - "total": 0.0009430000000000149 + "total": 0.0012749999999999968 }, "sddk::matrix_storage::remap_backward": { - "avg": 7.272006344171415e-07, - "count": 1261, - "max": 7e-06, + "avg": 7.912698412698532e-07, + "count": 1260, + "max": 1e-05, "min": 0.0, - "total": 0.0009170000000000154 + "total": 0.000997000000000015 }, "sddk::matrix_storage::remap_forward": { - "avg": 1.5815773630343169e-06, - "count": 1661, - "max": 1.9e-05, + "avg": 3.7078313253012057e-06, + "count": 1660, + "max": 2.4e-05, + "min": 1e-06, + "total": 0.006155000000000002 + }, + "sddk::matrix_storage::set_num_extra": { + "avg": 8.589041095890409e-07, + "count": 2920, + "max": 2e-05, "min": 0.0, - "total": 0.0026270000000000004 + "total": 0.0025079999999999994 + }, + "sddk::orthogonalize": { + "avg": 0.00016393601895734578, + "count": 844, + "max": 0.000388, + "min": 4.5e-05, + "total": 0.13836199999999985 + }, + "sddk::orthogonalize|tmtrx": { + "avg": 1.5437203791469206e-05, + "count": 844, + "max": 0.000117, + "min": 1e-06, + "total": 0.01302900000000001 + }, + "sddk::orthogonalize|transform": { + "avg": 2.419905213270147e-05, + "count": 844, + "max": 0.000152, + "min": 2e-06, + "total": 0.02042400000000004 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.004084, + "avg": 0.002319, "count": 1, - "max": 0.004084, - "min": 0.004084, - "total": 0.004084 + "max": 0.002319, + "min": 0.002319, + "total": 0.002319 }, "sddk::remap_gvec_to_shells|remap_backward": { - "avg": 0.0011273099999999996, + "avg": 0.00018694, "count": 100, - "max": 0.001708, - "min": 0.001023, - "total": 0.11273099999999997 + "max": 0.000311, + "min": 0.000163, + "total": 0.018694 }, "sddk::remap_gvec_to_shells|remap_forward": { - "avg": 0.001111419999999999, + "avg": 0.00018561000000000003, "count": 100, - "max": 0.001513, - "min": 0.001022, - "total": 0.11114199999999991 + "max": 0.000331, + "min": 0.000172, + "total": 0.018561000000000005 + }, + "sddk::transform": { + "avg": 5.066792275082422e-05, + "count": 2123, + "max": 0.000155, + "min": 1.6e-05, + "total": 0.10756799999999982 + }, + "sddk::transform|init": { + "avg": 5.9001413094677505e-06, + "count": 2123, + "max": 4.2e-05, + "min": 0.0, + "total": 0.012526000000000034 + }, + "sddk::transform|local": { + "avg": 1.763136499786019e-05, + "count": 4674, + "max": 9.9e-05, + "min": 4e-06, + "total": 0.08240899999999853 }, "sirius::Atom_type::init": { - "avg": 0.022407, + "avg": 0.024703, "count": 1, - "max": 0.022407, - "min": 0.022407, - "total": 0.022407 + "max": 0.024703, + "min": 0.024703, + "total": 0.024703 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.069794, + "avg": 0.068891, + "count": 1, + "max": 0.068891, + "min": 0.068891, + "total": 0.068891 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { + "avg": 0.03665316666666667, + "count": 6, + "max": 0.037564, + "min": 0.035653, + "total": 0.219919 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { + "avg": 0.033537166666666667, + "count": 6, + "max": 0.035632, + "min": 0.031818, + "total": 0.20122299999999999 + }, + "sirius::Augmentation_operator_gvec_deriv|constructor": { + "avg": 0.047658, "count": 1, - "max": 0.069794, - "min": 0.069794, - "total": 0.069794 + "max": 0.047658, + "min": 0.047658, + "total": 0.047658 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.07961634, + "avg": 0.042418975, "count": 200, - "max": 0.180803, - "min": 0.032376, - "total": 15.923267999999998 + "max": 0.079244, + "min": 0.016943, + "total": 8.483794999999999 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 3.547999999999996e-05, + "avg": 2.079499999999998e-05, "count": 200, - "max": 5.4e-05, - "min": 3e-05, - "total": 0.007095999999999991 + "max": 6e-05, + "min": 1.5e-05, + "total": 0.004158999999999996 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.00042642008032128483, - "count": 1245, - "max": 0.001246, - "min": 0.000179, - "total": 0.5308929999999996 + "avg": 0.00037954662379421184, + "count": 1244, + "max": 0.000908, + "min": 0.000181, + "total": 0.4721559999999995 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.07891162500000003, + "avg": 0.041854824999999984, "count": 200, - "max": 0.179415, - "min": 0.031739, - "total": 15.782325000000005 + "max": 0.078659, + "min": 0.016392, + "total": 8.370964999999996 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 7.435000000000005e-05, + "avg": 4.242750000000001e-05, "count": 400, - "max": 0.000216, - "min": 4.6e-05, - "total": 0.02974000000000002 - }, - "sirius::Band::diag_pseudo_potential_davidson|wf": { - "avg": 1.4209999999999998e-05, - "count": 200, - "max": 5.5e-05, - "min": 1.2e-05, - "total": 0.0028419999999999995 + "max": 0.000103, + "min": 2.5e-05, + "total": 0.016971000000000003 }, "sirius::Band::initialize_subspace": { - "avg": 0.515242, + "avg": 0.13495, "count": 1, - "max": 0.515242, - "min": 0.515242, - "total": 0.515242 + "max": 0.13495, + "min": 0.13495, + "total": 0.13495 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.06400462500000001, + "avg": 0.016798375, "count": 8, - "max": 0.078329, - "min": 0.059496, - "total": 0.5120370000000001 + "max": 0.017468, + "min": 0.016368, + "total": 0.134387 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.02737875, + "avg": 0.000304, "count": 8, - "max": 0.030188, - "min": 0.025234, - "total": 0.21903 + "max": 0.000354, + "min": 0.000262, + "total": 0.002432 }, "sirius::Band::residuals": { - "avg": 0.00022236144578313203, - "count": 1245, - "max": 0.001044, + "avg": 0.00024817684887459753, + "count": 1244, + "max": 0.000549, "min": 0.0, - "total": 0.27683999999999936 + "total": 0.30873199999999934 }, "sirius::Band::residuals_aux": { - "avg": 0.00022307647740440318, + "avg": 0.00030171958285052125, "count": 863, - "max": 0.000701, - "min": 0.000185, - "total": 0.19251499999999994 + "max": 0.000442, + "min": 0.00024, + "total": 0.26038399999999984 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.00015518783542039385, - "count": 1677, - "max": 0.000608, - "min": 6.5e-05, - "total": 0.2602500000000005 + "avg": 0.0001543084725537001, + "count": 1676, + "max": 0.000331, + "min": 3e-05, + "total": 0.2586210000000014 }, "sirius::Band::solve": { - "avg": 0.6388703600000001, + "avg": 0.3406053599999999, "count": 25, - "max": 1.139033, - "min": 0.286898, - "total": 15.971759000000002 + "max": 0.573948, + "min": 0.153558, + "total": 8.515133999999998 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.000246, + "avg": 0.00020349999999999999, "count": 8, - "max": 0.000355, - "min": 0.000196, - "total": 0.001968 + "max": 0.000367, + "min": 0.00013, + "total": 0.0016279999999999999 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.00024075, + "avg": 0.00016325, "count": 8, - "max": 0.000349, - "min": 0.000194, - "total": 0.001926 + "max": 0.000289, + "min": 0.000105, + "total": 0.001306 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 3.0147058823529355e-07, - "count": 408, - "max": 1e-06, + "avg": 3.7954545454545376e-07, + "count": 440, + "max": 5e-06, "min": 0.0, - "total": 0.00012299999999999976 + "total": 0.00016699999999999964 }, "sirius::Beta_projectors_base::generate": { - "avg": 2.5440109514031602e-05, - "count": 1461, - "max": 0.00011, - "min": 2.2e-05, - "total": 0.03716800000000017 + "avg": 2.4048076923076938e-05, + "count": 104, + "max": 6.8e-05, + "min": 1.9e-05, + "total": 0.0025010000000000015 }, "sirius::Beta_projectors_base::inner": { - "avg": 5.209993979530411e-05, - "count": 1661, - "max": 0.000213, - "min": 9e-06, - "total": 0.08653800000000013 + "avg": 2.7790870488322737e-05, + "count": 1884, + "max": 0.000109, + "min": 8e-06, + "total": 0.052358000000000036 + }, + "sirius::Beta_projectors_base::local_inner_aux": { + "avg": 2.4581210191082923e-05, + "count": 1884, + "max": 9.1e-05, + "min": 6e-06, + "total": 0.04631100000000023 }, "sirius::Beta_projectors_base::prepare": { - "avg": 7.352941176470608e-07, - "count": 408, - "max": 8e-06, - "min": 0.0, - "total": 0.0003000000000000008 + "avg": 1.1875000000000003e-06, + "count": 16, + "max": 2e-06, + "min": 1e-06, + "total": 1.9000000000000004e-05 + }, + "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { + "avg": 0.000682875, + "count": 8, + "max": 0.001131, + "min": 0.000468, + "total": 0.005463 }, "sirius::Broyden1::mix": { - "avg": 0.0033285999999999997, + "avg": 0.0020567199999999997, "count": 25, - "max": 0.005027, - "min": 0.000167, - "total": 0.083215 + "max": 0.002804, + "min": 6.6e-05, + "total": 0.051418 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.038587, + "avg": 0.000281, "count": 1, - "max": 0.038587, - "min": 0.038587, - "total": 0.038587 + "max": 0.000281, + "min": 0.000281, + "total": 0.000281 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 23.742605, + "avg": 11.638609, "count": 1, - "max": 23.742605, - "min": 23.742605, - "total": 23.742605 + "max": 11.638609, + "min": 11.638609, + "total": 11.638609 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 0.94940184, + "avg": 0.4653026000000001, "count": 25, - "max": 1.458038, - "min": 0.591722, - "total": 23.735046 + "max": 0.710698, + "min": 0.278512, + "total": 11.632565000000003 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.0003228450000000001, + "avg": 0.00032254000000000007, "count": 200, - "max": 0.000586, - "min": 0.000288, - "total": 0.06456900000000002 + "max": 0.000469, + "min": 0.000276, + "total": 0.06450800000000001 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.008370039999999997, + "avg": 0.00419498, "count": 200, - "max": 0.011823, - "min": 0.007029, - "total": 1.6740079999999995 + "max": 0.004846, + "min": 0.003606, + "total": 0.838996 }, "sirius::Density::augment": { - "avg": 0.062120319999999986, + "avg": 0.025657999999999993, "count": 25, - "max": 0.070022, - "min": 0.05888, - "total": 1.5530079999999997 + "max": 0.032776, + "min": 0.024499, + "total": 0.6414499999999999 }, "sirius::Density::compute_atomic_mag_mom": { - "avg": 0.000118, + "avg": 0.000124, "count": 1, - "max": 0.000118, - "min": 0.000118, - "total": 0.000118 + "max": 0.000124, + "min": 0.000124, + "total": 0.000124 }, "sirius::Density::generate": { - "avg": 0.13320131999999998, + "avg": 0.062416639999999995, "count": 25, - "max": 0.145818, - "min": 0.125262, - "total": 3.330033 + "max": 0.069655, + "min": 0.058911, + "total": 1.5604159999999998 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.041001, + "avg": 0.000838, "count": 1, - "max": 0.041001, - "min": 0.041001, - "total": 0.041001 + "max": 0.000838, + "min": 0.000838, + "total": 0.000838 }, "sirius::Density::generate_rho_aug": { - "avg": 0.06175660000000001, + "avg": 0.025474880000000005, "count": 25, - "max": 0.069718, - "min": 0.058519, - "total": 1.5439150000000001 + "max": 0.032628, + "min": 0.024347, + "total": 0.6368720000000001 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.00828524, + "avg": 0.009775060000000004, "count": 50, - "max": 0.015369, - "min": 0.002355, - "total": 0.41426199999999996 - }, - "sirius::Density::generate_rho_aug|phase_fac": { - "avg": 0.038516159999999994, - "count": 25, - "max": 0.04305, - "min": 0.036981, - "total": 0.9629039999999999 + "max": 0.021897, + "min": 0.001938, + "total": 0.48875300000000016 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.0021138800000000003, + "avg": 0.0016284000000000003, "count": 50, - "max": 0.003284, - "min": 0.001633, - "total": 0.10569400000000001 + "max": 0.002642, + "min": 0.001368, + "total": 0.08142000000000002 }, "sirius::Density::generate_valence": { - "avg": 0.13319531999999998, + "avg": 0.06241216000000001, "count": 25, - "max": 0.145814, - "min": 0.125258, - "total": 3.3298829999999997 + "max": 0.069648, + "min": 0.058906, + "total": 1.5603040000000001 }, "sirius::Density::initial_density": { - "avg": 0.046667, + "avg": 0.002663, "count": 1, - "max": 0.046667, - "min": 0.046667, - "total": 0.046667 + "max": 0.002663, + "min": 0.002663, + "total": 0.002663 }, "sirius::Density::symmetrize_density_matrix": { - "avg": 0.00270044, + "avg": 0.0026795200000000003, "count": 25, - "max": 0.003394, - "min": 0.00248, - "total": 0.067511 + "max": 0.003735, + "min": 0.002456, + "total": 0.066988 }, "sirius::Density::update": { - "avg": 0.041294, + "avg": 0.000854, "count": 1, - "max": 0.041294, - "min": 0.041294, - "total": 0.041294 + "max": 0.000854, + "min": 0.000854, + "total": 0.000854 }, "sirius::Field4D::symmetrize": { - "avg": 0.010192, + "avg": 0.00564874, "count": 50, - "max": 0.011781, - "min": 0.009542, - "total": 0.5095999999999999 + "max": 0.006932, + "min": 0.005386, + "total": 0.282437 + }, + "sirius::Force::calc_forces_core": { + "avg": 0.001105, + "count": 1, + "max": 0.001105, + "min": 0.001105, + "total": 0.001105 + }, + "sirius::Force::calc_forces_ewald": { + "avg": 0.000801, + "count": 1, + "max": 0.000801, + "min": 0.000801, + "total": 0.000801 + }, + "sirius::Force::calc_forces_nonloc": { + "avg": 0.009169, + "count": 1, + "max": 0.009169, + "min": 0.009169, + "total": 0.009169 + }, + "sirius::Force::calc_forces_scf_corr": { + "avg": 0.000467, + "count": 1, + "max": 0.000467, + "min": 0.000467, + "total": 0.000467 + }, + "sirius::Force::calc_forces_us": { + "avg": 0.008338, + "count": 1, + "max": 0.008338, + "min": 0.008338, + "total": 0.008338 + }, + "sirius::Force::calc_forces_vloc": { + "avg": 0.000504, + "count": 1, + "max": 0.000504, + "min": 0.000504, + "total": 0.000504 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.011658753370341, - "count": 1261, - "max": 0.026855, - "min": 0.000913, - "total": 14.701688 + "avg": 0.005773881746031742, + "count": 1260, + "max": 0.008837, + "min": 0.000509, + "total": 7.275090999999995 }, "sirius::Hamiltonian::get_h_diag": { - "avg": 0.0004227, + "avg": 0.0003486199999999999, "count": 200, - "max": 0.000801, - "min": 0.000363, - "total": 0.08453999999999999 + "max": 0.000549, + "min": 0.000298, + "total": 0.06972399999999998 }, "sirius::Hamiltonian::get_o_diag": { - "avg": 0.0002113800000000002, + "avg": 0.00016742000000000005, "count": 200, - "max": 0.00043, - "min": 0.00018, - "total": 0.042276000000000036 - }, - "sirius::Hamiltonian::prepare": { - "avg": 1.2499999999999999e-05, - "count": 26, - "max": 3.1e-05, - "min": 1.1e-05, - "total": 0.000325 + "max": 0.000262, + "min": 0.000143, + "total": 0.03348400000000001 }, "sirius::K_point::K_point": { - "avg": 6.249999999999999e-07, + "avg": 1e-06, "count": 8, - "max": 2e-06, + "max": 4e-06, "min": 0.0, - "total": 4.9999999999999996e-06 + "total": 8e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.000329125, + "avg": 0.00018975, "count": 8, - "max": 0.000433, - "min": 0.000248, - "total": 0.002633 + "max": 0.000254, + "min": 0.000146, + "total": 0.001518 }, "sirius::K_point::initialize": { - "avg": 0.000827, + "avg": 0.000546875, "count": 8, - "max": 0.001009, - "min": 0.00068, - "total": 0.006616 + "max": 0.000937, + "min": 0.000369, + "total": 0.004375 }, "sirius::K_point::update": { - "avg": 0.00043775, + "avg": 0.0003396249999999999, "count": 8, - "max": 0.000522, - "min": 0.000359, - "total": 0.003502 + "max": 0.000652, + "min": 0.000203, + "total": 0.0027169999999999994 }, "sirius::K_point_set::add_kpoint": { - "avg": 3.875000000000001e-06, + "avg": 4.125e-06, "count": 8, - "max": 1.5e-05, + "max": 1.7e-05, "min": 2e-06, - "total": 3.100000000000001e-05 + "total": 3.3e-05 }, "sirius::K_point_set::create_k_mesh": { - "avg": 0.017607, + "avg": 0.035186, "count": 1, - "max": 0.017607, - "min": 0.017607, - "total": 0.017607 + "max": 0.035186, + "min": 0.035186, + "total": 0.035186 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 0.00039184, + "avg": 0.0005006, "count": 25, - "max": 0.000583, - "min": 1.2e-05, - "total": 0.009796000000000001 + "max": 0.000706, + "min": 1.3e-05, + "total": 0.012515 }, "sirius::K_point_set::initialize": { - "avg": 0.006918, + "avg": 0.004455, "count": 1, - "max": 0.006918, - "min": 0.006918, - "total": 0.006918 + "max": 0.004455, + "min": 0.004455, + "total": 0.004455 }, "sirius::K_point_set::sync_band_energies": { - "avg": 8.760000000000003e-06, + "avg": 5.64e-06, "count": 25, - "max": 1.5e-05, - "min": 7e-06, - "total": 0.00021900000000000006 + "max": 1.2e-05, + "min": 5e-06, + "total": 0.000141 }, "sirius::Local_operator::apply_h": { - "avg": 0.011390589214908806, - "count": 1261, - "max": 0.026327, - "min": 0.000749, - "total": 14.363533000000004 + "avg": 0.005573008730158723, + "count": 1260, + "max": 0.008557, + "min": 0.000368, + "total": 7.021990999999991 }, "sirius::Local_operator::prepare": { - "avg": 0.00016910683760683784, + "avg": 6.811538461538455e-05, "count": 234, - "max": 0.003035, - "min": 6e-06, - "total": 0.03957100000000006 + "max": 0.000678, + "min": 7e-06, + "total": 0.015938999999999984 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 8.076923076923074e-07, + "avg": 1.0384615384615384e-06, "count": 52, "max": 2e-06, "min": 0.0, - "total": 4.1999999999999984e-05 + "total": 5.4e-05 }, "sirius::Non_local_operator::apply": { - "avg": 8.33203806502777e-05, - "count": 2522, - "max": 0.0003, - "min": 5.3e-05, - "total": 0.21013400000000038 + "avg": 7.683095238095233e-05, + "count": 2520, + "max": 0.000172, + "min": 5.1e-05, + "total": 0.1936139999999999 }, "sirius::Periodic_function::add": { - "avg": 7.226923076923076e-05, + "avg": 6.819230769230771e-05, "count": 52, - "max": 0.000197, - "min": 4.5e-05, - "total": 0.0037579999999999996 + "max": 0.000103, + "min": 5e-05, + "total": 0.0035460000000000005 }, "sirius::Periodic_function::inner": { - "avg": 6.004838709677418e-05, - "count": 310, - "max": 0.000144, - "min": 4.7e-05, - "total": 0.018614999999999996 + "avg": 7.20064102564103e-05, + "count": 312, + "max": 0.000155, + "min": 5.6e-05, + "total": 0.022466000000000014 }, "sirius::Periodic_function::integrate": { - "avg": 5.2000000000000004e-05, + "avg": 6.635294117647061e-05, "count": 51, - "max": 0.000121, - "min": 4.6e-05, - "total": 0.0026520000000000003 + "max": 0.000108, + "min": 5.3e-05, + "total": 0.003384000000000001 }, "sirius::Potential::Potential": { - "avg": 0.044242, + "avg": 0.004997, "count": 1, - "max": 0.044242, - "min": 0.044242, - "total": 0.044242 + "max": 0.004997, + "min": 0.004997, + "total": 0.004997 }, "sirius::Potential::generate": { - "avg": 0.1367136923076923, + "avg": 0.04228396153846155, "count": 26, - "max": 0.143564, - "min": 0.132231, - "total": 3.5545560000000003 + "max": 0.046748, + "min": 0.04002, + "total": 1.0993830000000002 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.006594384615384613, + "avg": 0.003243153846153846, "count": 26, - "max": 0.007742, - "min": 0.006127, - "total": 0.17145399999999994 + "max": 0.004545, + "min": 0.003069, + "total": 0.084322 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 1.3846153846153844e-06, + "avg": 5.384615384615386e-07, "count": 26, - "max": 2.6e-05, + "max": 1e-06, "min": 0.0, - "total": 3.5999999999999994e-05 + "total": 1.4000000000000003e-05 }, "sirius::Potential::generate_local_potential": { - "avg": 0.042618, + "avg": 0.001591, "count": 1, - "max": 0.042618, - "min": 0.042618, - "total": 0.042618 + "max": 0.001591, + "min": 0.001591, + "total": 0.001591 }, "sirius::Potential::poisson": { - "avg": 0.042727999999999995, + "avg": 0.000683846153846154, "count": 26, - "max": 0.047034, - "min": 0.040592, - "total": 1.110928 + "max": 0.000964, + "min": 0.000605, + "total": 0.017780000000000004 }, "sirius::Potential::update": { - "avg": 0.042909, + "avg": 0.001606, "count": 1, - "max": 0.042909, - "min": 0.042909, - "total": 0.042909 + "max": 0.001606, + "min": 0.001606, + "total": 0.001606 }, "sirius::Potential::xc": { - "avg": 0.0816421923076923, + "avg": 0.03730934615384615, "count": 26, - "max": 0.085345, - "min": 0.078956, - "total": 2.122697 + "max": 0.041758, + "min": 0.035266, + "total": 0.970043 }, "sirius::Potential::xc_rg_magnetic": { - "avg": 0.08163576923076922, + "avg": 0.03730334615384615, "count": 26, - "max": 0.08534, - "min": 0.078952, - "total": 2.12253 + "max": 0.041752, + "min": 0.035262, + "total": 0.9698869999999999 }, "sirius::Potential::xc_rg_magnetic|grad1": { - "avg": 0.030065038461538462, + "avg": 0.009825346153846155, "count": 26, - "max": 0.031987, - "min": 0.028379, - "total": 0.781691 + "max": 0.012451, + "min": 0.009026, + "total": 0.25545900000000005 }, "sirius::Potential::xc_rg_magnetic|grad2": { - "avg": 0.039191076923076926, + "avg": 0.014069923076923079, "count": 26, - "max": 0.041503, - "min": 0.036987, - "total": 1.018968 + "max": 0.018322, + "min": 0.013528, + "total": 0.36581800000000003 }, "sirius::Potential::xc_rg_magnetic|libxc": { - "avg": 0.008865423076923078, + "avg": 0.0049440000000000005, "count": 26, - "max": 0.010198, - "min": 0.007744, - "total": 0.230501 + "max": 0.006017, + "min": 0.003074, + "total": 0.12854400000000002 }, "sirius::Potential::xc_rg_magnetic|up_dn": { - "avg": 0.0003402307692307694, + "avg": 0.000325, "count": 26, - "max": 0.000848, - "min": 0.000295, - "total": 0.008846000000000003 + "max": 0.000533, + "min": 0.0003, + "total": 0.00845 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.088064, + "avg": 0.0888195, "count": 2, - "max": 0.088368, - "min": 0.08776, - "total": 0.176128 + "max": 0.090207, + "min": 0.087432, + "total": 0.177639 }, "sirius::Radial_integrals|aug": { - "avg": 0.47033400000000003, + "avg": 0.47323400000000004, "count": 2, - "max": 0.532288, - "min": 0.40838, - "total": 0.9406680000000001 + "max": 0.537019, + "min": 0.409449, + "total": 0.9464680000000001 }, "sirius::Radial_integrals|beta": { - "avg": 0.08264250000000001, + "avg": 0.08576600000000001, "count": 2, - "max": 0.085318, - "min": 0.079967, - "total": 0.16528500000000002 + "max": 0.087453, + "min": 0.084079, + "total": 0.17153200000000002 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.019806499999999998, + "avg": 0.0223115, "count": 2, - "max": 0.021491, - "min": 0.018122, - "total": 0.039612999999999995 + "max": 0.024372, + "min": 0.020251, + "total": 0.044623 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.018057, + "avg": 0.018067, "count": 1, - "max": 0.018057, - "min": 0.018057, - "total": 0.018057 + "max": 0.018067, + "min": 0.018067, + "total": 0.018067 }, "sirius::Radial_integrals|vloc": { - "avg": 0.065079, + "avg": 0.0837915, "count": 2, - "max": 0.069726, - "min": 0.060432, - "total": 0.130158 + "max": 0.084791, + "min": 0.082792, + "total": 0.167583 }, "sirius::Simulation_context::init_atoms_to_grid_idx": { - "avg": 0.001584, + "avg": 0.001474, "count": 1, - "max": 0.001584, - "min": 0.001584, - "total": 0.001584 + "max": 0.001474, + "min": 0.001474, + "total": 0.001474 }, "sirius::Simulation_context::init_comm": { - "avg": 0.000536, + "avg": 0.000261, "count": 1, - "max": 0.000536, - "min": 0.000536, - "total": 0.000536 + "max": 0.000261, + "min": 0.000261, + "total": 0.000261 }, "sirius::Simulation_context::init_fft": { - "avg": 0.020863, + "avg": 0.013281, "count": 1, - "max": 0.020863, - "min": 0.020863, - "total": 0.020863 + "max": 0.013281, + "min": 0.013281, + "total": 0.013281 }, "sirius::Simulation_context::initialize": { - "avg": 1.655469, + "avg": 1.720033, "count": 1, - "max": 1.655469, - "min": 1.655469, - "total": 1.655469 + "max": 1.720033, + "min": 1.720033, + "total": 1.720033 }, "sirius::Simulation_context::make_periodic_function": { - "avg": 0.038404, - "count": 3, - "max": 0.039819, - "min": 0.037231, - "total": 0.115212 + "avg": 0.0001773333333333333, + "count": 6, + "max": 0.000299, + "min": 0.000114, + "total": 0.0010639999999999998 }, "sirius::Simulation_context::update": { - "avg": 0.128457, + "avg": 0.102499, "count": 1, - "max": 0.128457, - "min": 0.128457, - "total": 0.128457 + "max": 0.102499, + "min": 0.102499, + "total": 0.102499 }, "sirius::Simulation_parameters::import": { - "avg": 0.000296, + "avg": 0.000172, "count": 1, - "max": 0.000296, - "min": 0.000296, - "total": 0.000296 + "max": 0.000172, + "min": 0.000172, + "total": 0.000172 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.002574318553092182, - "count": 857, - "max": 0.0054, - "min": 0.000547, - "total": 2.206191 + "avg": 0.0004563237327188933, + "count": 868, + "max": 0.001263, + "min": 0.000128, + "total": 0.39608899999999936 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 3.2749999999999996e-05, + "avg": 2e-05, "count": 4, - "max": 3.8e-05, - "min": 3e-05, - "total": 0.00013099999999999999 - }, - "sirius::Smooth_periodic_function_gradient|dot": { - "avg": 0.0003976208791208793, - "count": 182, - "max": 0.000537, - "min": 0.000291, - "total": 0.07236700000000003 - }, - "sirius::Smooth_periodic_function_gradient|gradient": { - "avg": 0.00043971538461538476, - "count": 130, - "max": 0.00074, - "min": 0.00014, - "total": 0.05716300000000002 - }, - "sirius::Smooth_periodic_function_gradient|laplacian": { - "avg": 0.00025044230769230764, - "count": 52, - "max": 0.000317, - "min": 0.000219, - "total": 0.013022999999999998 + "max": 2.1e-05, + "min": 1.9e-05, + "total": 8e-05 }, "sirius::Smooth_periodic_function|inner": { - "avg": 5.712144702842378e-05, - "count": 387, - "max": 0.000141, - "min": 4.5e-05, - "total": 0.022106000000000004 + "avg": 6.922820512820517e-05, + "count": 390, + "max": 0.000153, + "min": 5.1e-05, + "total": 0.02699900000000002 + }, + "sirius::Stress|ewald": { + "avg": 0.000651, + "count": 1, + "max": 0.000651, + "min": 0.000651, + "total": 0.000651 + }, + "sirius::Stress|har": { + "avg": 0.00026, + "count": 1, + "max": 0.00026, + "min": 0.00026, + "total": 0.00026 + }, + "sirius::Stress|kin": { + "avg": 0.001133, + "count": 1, + "max": 0.001133, + "min": 0.001133, + "total": 0.001133 + }, + "sirius::Stress|nonloc": { + "avg": 0.024665, + "count": 1, + "max": 0.024665, + "min": 0.024665, + "total": 0.024665 + }, + "sirius::Stress|us": { + "avg": 0.290691, + "count": 1, + "max": 0.290691, + "min": 0.290691, + "total": 0.290691 + }, + "sirius::Stress|us|gemm": { + "avg": 0.0009247777777777778, + "count": 18, + "max": 0.001029, + "min": 0.000884, + "total": 0.016646 + }, + "sirius::Stress|us|phase_fac": { + "avg": 9.4e-05, + "count": 1, + "max": 9.4e-05, + "min": 9.4e-05, + "total": 9.4e-05 + }, + "sirius::Stress|us|prepare": { + "avg": 0.00010566666666666668, + "count": 18, + "max": 0.000129, + "min": 8.9e-05, + "total": 0.0019020000000000003 + }, + "sirius::Stress|vloc": { + "avg": 0.000712, + "count": 1, + "max": 0.000712, + "min": 0.000712, + "total": 0.000712 }, "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.000362, + "avg": 0.0003345, "count": 2, - "max": 0.00057, - "min": 0.000154, - "total": 0.000724 + "max": 0.000576, + "min": 9.3e-05, + "total": 0.000669 }, "sirius::Unit_cell::get_symmetry": { - "avg": 0.010941, + "avg": 0.030998, "count": 2, - "max": 0.01099, - "min": 0.010892, - "total": 0.021882 + "max": 0.031815, + "min": 0.030181, + "total": 0.061996 }, "sirius::Unit_cell::initialize": { - "avg": 0.033902, + "avg": 0.057125, "count": 1, - "max": 0.033902, - "min": 0.033902, - "total": 0.033902 + "max": 0.057125, + "min": 0.057125, + "total": 0.057125 }, "sirius::Unit_cell::update": { - "avg": 0.011313, + "avg": 0.0313545, "count": 2, - "max": 0.011474, - "min": 0.011152, - "total": 0.022626 + "max": 0.032401, + "min": 0.030308, + "total": 0.062709 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { - "avg": 0.010911500000000001, + "avg": 0.030973, "count": 2, - "max": 0.010958, - "min": 0.010865, - "total": 0.021823000000000002 + "max": 0.031795, + "min": 0.030151, + "total": 0.061946 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { - "avg": 0.010716, + "avg": 0.030747, "count": 2, - "max": 0.010794, - "min": 0.010638, - "total": 0.021432 + "max": 0.031479, + "min": 0.030015, + "total": 0.061494 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { - "avg": 5.1e-05, + "avg": 4.5e-05, "count": 2, - "max": 6.8e-05, - "min": 3.4e-05, - "total": 0.000102 + "max": 6.2e-05, + "min": 2.8e-05, + "total": 9e-05 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { - "avg": 0.00011300000000000001, + "avg": 0.000147, "count": 2, - "max": 0.000133, - "min": 9.3e-05, - "total": 0.00022600000000000002 + "max": 0.000208, + "min": 8.6e-05, + "total": 0.000294 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { - "avg": 1.9e-05, + "avg": 1.2499999999999999e-05, "count": 2, - "max": 2.6e-05, - "min": 1.2e-05, - "total": 3.8e-05 + "max": 1.4e-05, + "min": 1.1e-05, + "total": 2.4999999999999998e-05 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw": { - "avg": 0.005056120000000001, + "avg": 0.0028442200000000006, "count": 50, - "max": 0.007187, - "min": 0.004472, - "total": 0.25280600000000003 + "max": 0.004088, + "min": 0.002672, + "total": 0.14221100000000003 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw|local": { - "avg": 0.002722379999999999, + "avg": 0.0024319800000000007, "count": 50, - "max": 0.004158, - "min": 0.002226, - "total": 0.13611899999999996 + "max": 0.003487, + "min": 0.002288, + "total": 0.12159900000000003 }, "sirius::Unit_cell_symmetry::symmetrize_vector_function_pw_1c": { - "avg": 0.0051274400000000005, + "avg": 0.0027963799999999994, "count": 50, - "max": 0.005938, - "min": 0.004585, - "total": 0.25637200000000004 + "max": 0.003487, + "min": 0.002645, + "total": 0.13981899999999997 + }, + "sirius::dot": { + "avg": 0.0006034065934065934, + "count": 182, + "max": 0.00082, + "min": 0.000514, + "total": 0.10982 + }, + "sirius::gradient": { + "avg": 0.0011047786259541987, + "count": 131, + "max": 0.001843, + "min": 0.000163, + "total": 0.14472600000000002 + }, + "sirius::laplacian": { + "avg": 0.00055775, + "count": 52, + "max": 0.000663, + "min": 0.000509, + "total": 0.029003 } } } \ No newline at end of file diff --git a/verification/test7/sirius.json b/verification/test7/sirius.json index a2b356809..d6d478451 100644 --- a/verification/test7/sirius.json +++ b/verification/test7/sirius.json @@ -4,7 +4,9 @@ "processing_unit" : "cpu", "std_evp_solver_type" : "lapack", "gen_evp_solver_type" : "lapack", - "verbosity" : 1 + "verbosity" : 1, + "print_forces" : true, + "print_stress" : true }, "parameters" : { diff --git a/verification/test8/output_ref.json b/verification/test8/output_ref.json index 4468b79f5..bc709166a 100644 --- a/verification/test8/output_ref.json +++ b/verification/test8/output_ref.json @@ -1,814 +1,1024 @@ { - "build_date": "Thu, 15 Feb 2018 13:44:11", "comm_world_size": 1, - "git_hash": "fd622cf4fe7411690c449c2f12c73d1fd5d5ce8c", + "counters": { + "band_evp_work_count": 206.74609375, + "local_operator_num_applied": 203 + }, + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, - "band_gap": 0.0768573738727659, + "band_gap": 0.07685737152320804, "chemical_formula": "Si2", + "converged": true, "core_leakage": 0.0, - "efermi": 0.294966790637831, + "efermi": 0.2949667899039599, "energy": { "bxc": 0.0, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": 1.1587864260367, - "ewald": -8.4004646849006, - "exc": -4.43243958137187, - "kin": 5.68042667423368, - "total": -8.98813303367845, - "veff": -4.52164024819698, - "vha": 1.66545907662844, - "vxc": -3.51871434487154 - }, - "fft_coarse_grid": [30, 30, 30], - "fft_grid": [48, 48, 48], - "mpi_grid": [1, 1], + "eval_sum": 1.1587864307807658, + "ewald": -8.40046462950163, + "exc": -4.432440611845719, + "kin": 5.68042664315557, + "total": -8.988134017619515, + "veff": -4.5216402123748045, + "vha": 1.665459055656145, + "vxc": -3.5187143207751395 + }, + "fft_coarse_grid": [30,30,30], + "fft_grid": [50,50,50], + "forces": [ + [1.9482804436698966e-15,8.198627173705361e-16,3.1597147764562433e-16], + [-3.225976690782158e-15,-1.8858918902562784e-15,-1.9400374061170266e-15] + ], + "mpi_grid": [1,1], "num_atoms": 2, "num_bands": 8, "num_fv_states": 8, + "num_scf_iterations": 12, "omega": 270.011394, - "pw_cutoff": 20.0 + "pw_cutoff": 20.0, + "stress": [ + [-0.0013451382818108933,6.269303204385533e-38,5.385290448032832e-28], + [6.269303204385533e-38,-0.0013451382818108933,5.385290444427983e-28], + [5.385290448032832e-28,5.385290444427983e-28,-0.0013451382818108898] + ] }, "task": 0, "threads_per_rank": 8, "timers": { - "+global_timer": { - "avg": 3.890531, - "count": 1, - "max": 3.890531, - "min": 3.890531, - "total": 3.890531 - }, - "Eigensolver_lapack::solve_std": { - "avg": 9.345e-05, + "Eigensolver_lapack|zheevr": { + "avg": 7.735000000000002e-05, "count": 20, - "max": 0.000173, - "min": 6.4e-05, - "total": 0.001869 + "max": 0.000164, + "min": 4.8e-05, + "total": 0.0015470000000000004 }, - "Eigensolver_lapack::solve_std|zheevr": { - "avg": 8.695e-05, - "count": 20, - "max": 0.000167, - "min": 6e-05, - "total": 0.001739 + "Eigensolver_lapack|zhegvx": { + "avg": 7.985714285714284e-05, + "count": 14, + "max": 0.000263, + "min": 5.7e-05, + "total": 0.0011179999999999999 }, "sddk::FFT3D::FFT3D": { - "avg": 0.0025345, + "avg": 0.0047235, "count": 2, - "max": 0.003808, - "min": 0.001261, - "total": 0.005069 + "max": 0.005939, + "min": 0.003508, + "total": 0.009447 }, "sddk::FFT3D::prepare": { - "avg": 5.97636363636364e-05, + "avg": 6.401818181818183e-05, "count": 55, - "max": 0.000148, - "min": 3.5e-05, - "total": 0.003287 + "max": 9.7e-05, + "min": 4.8e-05, + "total": 0.0035210000000000007 }, "sddk::FFT3D::prepare|cpu": { - "avg": 5.67454545454545e-05, + "avg": 5.898181818181819e-05, "count": 55, - "max": 0.000138, - "min": 3.3e-05, - "total": 0.003121 + "max": 9.1e-05, + "min": 4.4e-05, + "total": 0.0032440000000000004 }, "sddk::FFT3D::transform": { - "avg": 0.000409672268907563, - "count": 595, - "max": 0.001424, - "min": 0.000273, - "total": 0.243755 + "avg": 0.0003392068403908798, + "count": 614, + "max": 0.001801, + "min": 0.000228, + "total": 0.2082730000000002 }, "sddk::FFT3D::transform_xy": { - "avg": 0.000252576470588235, - "count": 595, - "max": 0.000896, - "min": 0.000157, - "total": 0.150283 + "avg": 0.000228998371335505, + "count": 614, + "max": 0.000983, + "min": 0.000151, + "total": 0.14060500000000006 }, "sddk::FFT3D::transform_z": { - "avg": 0.000150571428571429, - "count": 595, - "max": 0.000518, - "min": 9.2e-05, - "total": 0.0895900000000001 + "avg": 0.00010257328990228, + "count": 614, + "max": 0.000807, + "min": 5.7e-05, + "total": 0.06297999999999992 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.00014801512605042, - "count": 595, - "max": 0.000512, - "min": 9e-05, - "total": 0.088069 + "avg": 9.941042345276864e-05, + "count": 614, + "max": 0.000784, + "min": 5.5e-05, + "total": 0.061037999999999946 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.000145220168067227, - "count": 595, - "max": 0.000507, - "min": 8.8e-05, - "total": 0.086406 + "avg": 9.5828990228013e-05, + "count": 614, + "max": 0.000744, + "min": 5.1e-05, + "total": 0.05883899999999998 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.000716333333333333, - "count": 3, - "max": 0.001547, - "min": 0.000204, - "total": 0.002149 + "avg": 0.000726, + "count": 6, + "max": 0.001568, + "min": 0.000201, + "total": 0.004356 }, "sddk::Gvec::init": { - "avg": 0.00211033333333333, + "avg": 0.002445333333333333, "count": 3, - "max": 0.00473, - "min": 0.000423, - "total": 0.006331 + "max": 0.005488, + "min": 0.000412, + "total": 0.007336 }, - "sddk::Wave_functions::orthogonalize": { - "avg": 0.00040495, - "count": 20, - "max": 0.000979, - "min": 8.9e-05, - "total": 0.008099 + "sddk::inner": { + "avg": 2.8272727272727264e-05, + "count": 88, + "max": 7.3e-05, + "min": 4e-06, + "total": 0.0024879999999999993 + }, + "sddk::inner|local": { + "avg": 2.5215909090909073e-05, + "count": 88, + "max": 7e-05, + "min": 3e-06, + "total": 0.0022189999999999983 }, "sddk::matrix_storage::matrix_storage": { - "avg": 1.26506024096385e-06, + "avg": 1.2530120481927692e-06, "count": 83, - "max": 1.2e-05, + "max": 5e-06, "min": 0.0, - "total": 0.000105 + "total": 0.00010399999999999984 }, "sddk::matrix_storage::remap_backward": { - "avg": 5.58823529411765e-07, + "avg": 1.2352941176470582e-06, "count": 34, - "max": 1e-06, + "max": 1.7e-05, "min": 0.0, - "total": 1.9e-05 + "total": 4.199999999999998e-05 }, "sddk::matrix_storage::remap_forward": { - "avg": 1.57446808510638e-06, + "avg": 4.5319148936170215e-06, "count": 47, + "max": 8e-06, + "min": 3e-06, + "total": 0.00021300000000000003 + }, + "sddk::matrix_storage::set_num_extra": { + "avg": 1.0864197530864186e-06, + "count": 81, "max": 3e-06, + "min": 0.0, + "total": 8.79999999999999e-05 + }, + "sddk::orthogonalize": { + "avg": 0.00016255, + "count": 20, + "max": 0.00036, + "min": 7.6e-05, + "total": 0.003251 + }, + "sddk::orthogonalize|tmtrx": { + "avg": 4.700000000000001e-06, + "count": 20, + "max": 2.3e-05, "min": 1e-06, - "total": 7.4e-05 + "total": 9.400000000000001e-05 + }, + "sddk::orthogonalize|transform": { + "avg": 2.7049999999999997e-05, + "count": 20, + "max": 6.4e-05, + "min": 7e-06, + "total": 0.0005409999999999999 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.003952, + "avg": 0.004179, "count": 1, - "max": 0.003952, - "min": 0.003952, - "total": 0.003952 + "max": 0.004179, + "min": 0.004179, + "total": 0.004179 }, "sddk::remap_gvec_to_shells|remap_backward": { - "avg": 0.0003945, + "avg": 0.00036353846153846153, "count": 26, - "max": 0.000706, - "min": 0.000352, - "total": 0.010257 + "max": 0.000596, + "min": 0.000304, + "total": 0.009452 }, "sddk::remap_gvec_to_shells|remap_forward": { - "avg": 0.000400615384615385, + "avg": 0.00038630769230769236, "count": 26, - "max": 0.000508, - "min": 0.000373, - "total": 0.010416 + "max": 0.00063, + "min": 0.000322, + "total": 0.010044 }, - "sddk::wave_functions::inner": { - "avg": 8.18181818181818e-05, - "count": 88, - "max": 0.00028, - "min": 2e-06, - "total": 0.0072 - }, - "sddk::wave_functions::transform": { - "avg": 0.000182796296296296, + "sddk::transform": { + "avg": 5.251851851851851e-05, "count": 54, - "max": 0.000539, - "min": 3.9e-05, - "total": 0.009871 + "max": 0.000196, + "min": 2.2e-05, + "total": 0.0028359999999999995 }, - "sddk::wave_functions::transform|init": { - "avg": 1.17592592592593e-05, + "sddk::transform|init": { + "avg": 8.462962962962965e-06, "count": 54, - "max": 0.000109, - "min": 2e-06, - "total": 0.000635 + "max": 3.1e-05, + "min": 1e-06, + "total": 0.0004570000000000001 + }, + "sddk::transform|local": { + "avg": 1.7684210526315787e-05, + "count": 114, + "max": 5.5e-05, + "min": 9e-06, + "total": 0.0020159999999999996 }, "sirius::Atom_type::init": { - "avg": 0.029416, + "avg": 0.022532, "count": 1, - "max": 0.029416, - "min": 0.029416, - "total": 0.029416 + "max": 0.022532, + "min": 0.022532, + "total": 0.022532 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.110899, + "avg": 0.089295, "count": 1, - "max": 0.110899, - "min": 0.110899, - "total": 0.110899 + "max": 0.089295, + "min": 0.089295, + "total": 0.089295 }, - "sirius::Band::diag_pseudo_potential": { - "avg": 0.0190069230769231, - "count": 13, - "max": 0.03627, - "min": 0.009386, - "total": 0.24709 + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { + "avg": 0.066819, + "count": 3, + "max": 0.073569, + "min": 0.061647, + "total": 0.200457 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { + "avg": 0.06145599999999999, + "count": 3, + "max": 0.068546, + "min": 0.057271, + "total": 0.18436799999999998 + }, + "sirius::Augmentation_operator_gvec_deriv|constructor": { + "avg": 0.062275, + "count": 1, + "max": 0.062275, + "min": 0.062275, + "total": 0.062275 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.01891, + "avg": 0.014178384615384617, "count": 13, - "max": 0.036188, - "min": 0.009308, - "total": 0.24583 + "max": 0.027862, + "min": 0.008261, + "total": 0.184319 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 1.43076923076923e-05, + "avg": 2.376923076923077e-05, "count": 13, - "max": 2.3e-05, - "min": 1e-05, - "total": 0.000186 + "max": 2.9e-05, + "min": 1.9e-05, + "total": 0.00030900000000000003 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 8.35151515151515e-05, + "avg": 7.533333333333334e-05, "count": 33, - "max": 0.000175, - "min": 6.3e-05, - "total": 0.002756 + "max": 0.000168, + "min": 5e-05, + "total": 0.0024860000000000004 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.0175916923076923, + "avg": 0.013070076923076924, "count": 13, - "max": 0.035019, - "min": 0.008079, - "total": 0.228692 + "max": 0.026509, + "min": 0.00696, + "total": 0.169911 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 0.000185769230769231, - "count": 13, - "max": 0.000342, - "min": 0.000107, - "total": 0.002415 - }, - "sirius::Band::diag_pseudo_potential_davidson|wf": { - "avg": 1.60769230769231e-05, - "count": 13, - "max": 3.4e-05, - "min": 1.3e-05, - "total": 0.000209 - }, - "sirius::Band::get_h_diag": { - "avg": 0.000663923076923077, - "count": 13, - "max": 0.000916, - "min": 0.000561, - "total": 0.008631 - }, - "sirius::Band::get_o_diag": { - "avg": 0.000601615384615385, + "avg": 3.9384615384615384e-05, "count": 13, - "max": 0.000769, - "min": 0.000513, - "total": 0.007821 + "max": 6.2e-05, + "min": 2.4e-05, + "total": 0.000512 }, "sirius::Band::initialize_subspace": { - "avg": 0.01097, + "avg": 0.007421, "count": 1, - "max": 0.01097, - "min": 0.01097, - "total": 0.01097 + "max": 0.007421, + "min": 0.007421, + "total": 0.007421 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.010096, + "avg": 0.006742, "count": 1, - "max": 0.010096, - "min": 0.010096, - "total": 0.010096 + "max": 0.006742, + "min": 0.006742, + "total": 0.006742 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.000883, + "avg": 0.000465, "count": 1, - "max": 0.000883, - "min": 0.000883, - "total": 0.000883 + "max": 0.000465, + "min": 0.000465, + "total": 0.000465 }, "sirius::Band::residuals": { - "avg": 0.000286060606060606, + "avg": 0.00022090909090909097, "count": 33, - "max": 0.000773, + "max": 0.000424, "min": 0.0, - "total": 0.00944 + "total": 0.007290000000000002 }, "sirius::Band::residuals_aux": { - "avg": 0.00029925, + "avg": 0.00030895, "count": 20, - "max": 0.000378, - "min": 0.000217, - "total": 0.005985 + "max": 0.000362, + "min": 0.000268, + "total": 0.0061790000000000005 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.000195770833333333, + "avg": 0.00013468750000000001, "count": 48, - "max": 0.000402, - "min": 9.6e-05, - "total": 0.009397 + "max": 0.000248, + "min": 2.7e-05, + "total": 0.006465 }, - "sirius::Band::solve_for_kset": { - "avg": 0.0196206923076923, + "sirius::Band::solve": { + "avg": 0.014831846153846152, "count": 13, - "max": 0.036821, - "min": 0.009981, - "total": 0.255069 + "max": 0.028563, + "min": 0.008967, + "total": 0.19281399999999999 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.000554, + "avg": 0.00097, "count": 1, - "max": 0.000554, - "min": 0.000554, - "total": 0.000554 + "max": 0.00097, + "min": 0.00097, + "total": 0.00097 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.000549, + "avg": 0.000458, "count": 1, - "max": 0.000549, - "min": 0.000549, - "total": 0.000549 + "max": 0.000458, + "min": 0.000458, + "total": 0.000458 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 4.81481481481482e-07, - "count": 27, - "max": 3e-06, + "avg": 3.870967741935485e-07, + "count": 31, + "max": 1e-06, "min": 0.0, - "total": 1.3e-05 + "total": 1.2000000000000002e-05 }, "sirius::Beta_projectors_base::generate": { - "avg": 0.00017031914893617, - "count": 47, - "max": 0.000193, - "min": 0.000159, - "total": 0.008005 + "avg": 0.00017584615384615384, + "count": 13, + "max": 0.000497, + "min": 0.000145, + "total": 0.002286 }, "sirius::Beta_projectors_base::inner": { - "avg": 0.000372936170212766, - "count": 47, - "max": 0.000524, - "min": 5.4e-05, - "total": 0.017528 + "avg": 8.636065573770493e-05, + "count": 61, + "max": 0.000142, + "min": 3.7e-05, + "total": 0.005268000000000001 + }, + "sirius::Beta_projectors_base::local_inner_aux": { + "avg": 8.303278688524586e-05, + "count": 61, + "max": 0.000138, + "min": 3.5e-05, + "total": 0.0050649999999999975 }, "sirius::Beta_projectors_base::prepare": { - "avg": 1.88888888888889e-06, - "count": 27, - "max": 4.1e-05, - "min": 0.0, - "total": 5.1e-05 + "avg": 2e-06, + "count": 2, + "max": 2e-06, + "min": 2e-06, + "total": 4e-06 + }, + "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { + "avg": 0.001661, + "count": 1, + "max": 0.001661, + "min": 0.001661, + "total": 0.001661 }, "sirius::Broyden1::mix": { - "avg": 0.000440923076923077, + "avg": 0.001370153846153846, "count": 13, - "max": 0.000896, - "min": 2.7e-05, - "total": 0.005732 + "max": 0.00291, + "min": 5.2e-05, + "total": 0.017811999999999998 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.000686, + "avg": 0.000464, "count": 1, - "max": 0.000686, - "min": 0.000686, - "total": 0.000686 + "max": 0.000464, + "min": 0.000464, + "total": 0.000464 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 2.514176, + "avg": 0.943058, "count": 1, - "max": 2.514176, - "min": 2.514176, - "total": 2.514176 + "max": 0.943058, + "min": 0.943058, + "total": 0.943058 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 0.192544461538462, + "avg": 0.07209300000000002, "count": 13, - "max": 0.221328, - "min": 0.180039, - "total": 2.503078 - }, - "sirius::DFT_ground_state::symmetrize": { - "avg": 0.0591704230769231, - "count": 26, - "max": 0.067948, - "min": 0.055723, - "total": 1.538431 + "max": 0.09488, + "min": 0.059726, + "total": 0.9372090000000002 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.000762, + "avg": 0.00023715384615384617, "count": 13, - "max": 0.000849, - "min": 0.000727, - "total": 0.009906 + "max": 0.000303, + "min": 0.000212, + "total": 0.0030830000000000002 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.00328407692307692, + "avg": 0.0031318461538461535, "count": 13, - "max": 0.003616, - "min": 0.002986, - "total": 0.042693 + "max": 0.003539, + "min": 0.002601, + "total": 0.04071399999999999 }, "sirius::Density::augment": { - "avg": 0.0246766923076923, + "avg": 0.024499076923076925, "count": 13, - "max": 0.036988, - "min": 0.022702, - "total": 0.320797 + "max": 0.042563, + "min": 0.02156, + "total": 0.31848800000000005 }, "sirius::Density::generate": { - "avg": 0.0293203846153846, + "avg": 0.028399769230769232, "count": 13, - "max": 0.041829, - "min": 0.027104, - "total": 0.381165 + "max": 0.04619, + "min": 0.024883, + "total": 0.369197 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.013545, + "avg": 0.001513, "count": 1, - "max": 0.013545, - "min": 0.013545, - "total": 0.013545 + "max": 0.001513, + "min": 0.001513, + "total": 0.001513 }, "sirius::Density::generate_rho_aug": { - "avg": 0.0245031538461538, + "avg": 0.024413538461538462, "count": 13, - "max": 0.03679, - "min": 0.022508, - "total": 0.318541 + "max": 0.042489, + "min": 0.021486, + "total": 0.317376 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.0125046153846154, + "avg": 0.012042461538461538, "count": 13, - "max": 0.025533, - "min": 0.011104, - "total": 0.16256 - }, - "sirius::Density::generate_rho_aug|phase_fac": { - "avg": 0.000230076923076923, - "count": 13, - "max": 0.000431, - "min": 0.000153, - "total": 0.002991 + "max": 0.034233, + "min": 0.00935, + "total": 0.156552 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.00424961538461538, + "avg": 0.003837923076923077, "count": 13, - "max": 0.005997, - "min": 0.0034, - "total": 0.055245 + "max": 0.005201, + "min": 0.002984, + "total": 0.049893 }, "sirius::Density::generate_valence": { - "avg": 0.0293134615384615, + "avg": 0.02839523076923077, "count": 13, - "max": 0.041812, - "min": 0.027099, - "total": 0.381075 + "max": 0.04618, + "min": 0.024879, + "total": 0.369138 }, "sirius::Density::initial_density": { - "avg": 0.019478, + "avg": 0.002718, "count": 1, - "max": 0.019478, - "min": 0.019478, - "total": 0.019478 + "max": 0.002718, + "min": 0.002718, + "total": 0.002718 }, "sirius::Density::symmetrize_density_matrix": { - "avg": 0.002247, + "avg": 0.002212769230769231, "count": 13, - "max": 0.002323, - "min": 0.002228, - "total": 0.029211 + "max": 0.002778, + "min": 0.002011, + "total": 0.028766 + }, + "sirius::Density::update": { + "avg": 0.001539, + "count": 1, + "max": 0.001539, + "min": 0.001539, + "total": 0.001539 + }, + "sirius::Field4D::symmetrize": { + "avg": 0.005690538461538463, + "count": 26, + "max": 0.00751, + "min": 0.004962, + "total": 0.14795400000000003 + }, + "sirius::Force::calc_forces_core": { + "avg": 0.002169, + "count": 1, + "max": 0.002169, + "min": 0.002169, + "total": 0.002169 + }, + "sirius::Force::calc_forces_ewald": { + "avg": 0.001359, + "count": 1, + "max": 0.001359, + "min": 0.001359, + "total": 0.001359 + }, + "sirius::Force::calc_forces_nonloc": { + "avg": 0.001544, + "count": 1, + "max": 0.001544, + "min": 0.001544, + "total": 0.001544 + }, + "sirius::Force::calc_forces_scf_corr": { + "avg": 0.001024, + "count": 1, + "max": 0.001024, + "min": 0.001024, + "total": 0.001024 + }, + "sirius::Force::calc_forces_us": { + "avg": 0.009193, + "count": 1, + "max": 0.009193, + "min": 0.009193, + "total": 0.009193 + }, + "sirius::Force::calc_forces_vloc": { + "avg": 0.001188, + "count": 1, + "max": 0.001188, + "min": 0.001188, + "total": 0.001188 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.00600332352941176, + "avg": 0.004545823529411765, "count": 34, - "max": 0.008462, - "min": 0.001241, - "total": 0.204113 + "max": 0.006906, + "min": 0.000832, + "total": 0.15455800000000003 + }, + "sirius::Hamiltonian::get_h_diag": { + "avg": 0.0005371538461538462, + "count": 13, + "max": 0.000708, + "min": 0.000416, + "total": 0.006983 + }, + "sirius::Hamiltonian::get_o_diag": { + "avg": 0.000517, + "count": 13, + "max": 0.000669, + "min": 0.000402, + "total": 0.0067209999999999995 }, "sirius::K_point::K_point": { - "avg": 1e-06, + "avg": 4e-06, "count": 1, - "max": 1e-06, - "min": 1e-06, - "total": 1e-06 + "max": 4e-06, + "min": 4e-06, + "total": 4e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.000429, + "avg": 0.00043, "count": 1, - "max": 0.000429, - "min": 0.000429, - "total": 0.000429 + "max": 0.00043, + "min": 0.00043, + "total": 0.00043 }, "sirius::K_point::initialize": { - "avg": 0.001164, + "avg": 0.001696, "count": 1, - "max": 0.001164, - "min": 0.001164, - "total": 0.001164 + "max": 0.001696, + "min": 0.001696, + "total": 0.001696 }, - "sirius::K_point_set::K_point_set": { - "avg": 0.009514, + "sirius::K_point::update": { + "avg": 0.001231, "count": 1, - "max": 0.009514, - "min": 0.009514, - "total": 0.009514 + "max": 0.001231, + "min": 0.001231, + "total": 0.001231 }, "sirius::K_point_set::add_kpoint": { - "avg": 8e-06, + "avg": 1.6e-05, "count": 1, - "max": 8e-06, - "min": 8e-06, - "total": 8e-06 + "max": 1.6e-05, + "min": 1.6e-05, + "total": 1.6e-05 + }, + "sirius::K_point_set::create_k_mesh": { + "avg": 0.023917, + "count": 1, + "max": 0.023917, + "min": 0.023917, + "total": 0.023917 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 5.94615384615385e-05, + "avg": 2.730769230769231e-05, "count": 13, - "max": 7e-05, - "min": 5.3e-05, - "total": 0.000773 + "max": 3.2e-05, + "min": 2.1e-05, + "total": 0.000355 + }, + "sirius::K_point_set::initialize": { + "avg": 0.001776, + "count": 1, + "max": 0.001776, + "min": 0.001776, + "total": 0.001776 }, "sirius::K_point_set::sync_band_energies": { - "avg": 3.46153846153846e-06, + "avg": 3.6923076923076925e-06, "count": 13, - "max": 8e-06, - "min": 3e-06, - "total": 4.5e-05 + "max": 1e-05, + "min": 2e-06, + "total": 4.8e-05 }, "sirius::Local_operator::apply_h": { - "avg": 0.00476691176470588, + "avg": 0.004240382352941176, "count": 34, - "max": 0.006919, - "min": 0.00078, - "total": 0.162075 + "max": 0.006551, + "min": 0.000641, + "total": 0.144173 }, "sirius::Local_operator::prepare": { - "avg": 0.000304928571428571, + "avg": 0.00027103571428571427, "count": 28, - "max": 0.000834, - "min": 2.9e-05, - "total": 0.008538 + "max": 0.000635, + "min": 2.4e-05, + "total": 0.007589 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 1e-06, + "avg": 1.1785714285714287e-06, "count": 28, - "max": 2e-06, + "max": 3e-06, "min": 0.0, - "total": 2.8e-05 + "total": 3.3e-05 }, "sirius::Non_local_operator::apply": { - "avg": 0.000346764705882353, + "avg": 9.522058823529416e-05, "count": 68, - "max": 0.000493, - "min": 8.2e-05, - "total": 0.02358 + "max": 0.00015, + "min": 6.6e-05, + "total": 0.0064750000000000025 }, "sirius::Periodic_function::add": { - "avg": 0.000133928571428571, + "avg": 8.52857142857143e-05, "count": 28, - "max": 0.000179, - "min": 7.8e-05, - "total": 0.00375 + "max": 0.000235, + "min": 6.2e-05, + "total": 0.0023880000000000004 }, "sirius::Periodic_function::inner": { - "avg": 0.000204368821292776, - "count": 263, - "max": 0.00036, - "min": 9.9e-05, - "total": 0.053749 + "avg": 8.945132743362831e-05, + "count": 113, + "max": 0.000155, + "min": 6.3e-05, + "total": 0.010107999999999999 }, "sirius::Periodic_function::integrate": { - "avg": 0.000124769230769231, - "count": 13, - "max": 0.000191, - "min": 9.7e-05, - "total": 0.001622 + "avg": 7.842857142857143e-05, + "count": 14, + "max": 0.000131, + "min": 6.5e-05, + "total": 0.001098 }, "sirius::Potential::Potential": { - "avg": 0.171186, + "avg": 0.007503, "count": 1, - "max": 0.171186, - "min": 0.171186, - "total": 0.171186 + "max": 0.007503, + "min": 0.007503, + "total": 0.007503 }, "sirius::Potential::generate": { - "avg": 0.0177877142857143, + "avg": 0.011206142857142857, "count": 14, - "max": 0.02029, - "min": 0.016681, - "total": 0.249028 + "max": 0.015384, + "min": 0.009223, + "total": 0.156886 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.0102352857142857, + "avg": 0.003758714285714286, "count": 14, - "max": 0.011052, - "min": 0.009547, - "total": 0.143294 + "max": 0.004617, + "min": 0.003197, + "total": 0.052622 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 1e-06, + "avg": 4.285714285714285e-07, "count": 14, - "max": 1.2e-05, + "max": 1e-06, "min": 0.0, - "total": 1.4e-05 + "total": 5.999999999999999e-06 }, "sirius::Potential::generate_local_potential": { - "avg": 0.168408, + "avg": 0.002519, "count": 1, - "max": 0.168408, - "min": 0.168408, - "total": 0.168408 - }, - "sirius::Potential::init": { - "avg": 3e-06, - "count": 1, - "max": 3e-06, - "min": 3e-06, - "total": 3e-06 + "max": 0.002519, + "min": 0.002519, + "total": 0.002519 }, "sirius::Potential::poisson": { - "avg": 0.00164971428571429, + "avg": 0.0009882857142857143, "count": 14, - "max": 0.002021, - "min": 0.001502, - "total": 0.023096 + "max": 0.001397, + "min": 0.00079, + "total": 0.013836 + }, + "sirius::Potential::update": { + "avg": 0.002545, + "count": 1, + "max": 0.002545, + "min": 0.002545, + "total": 0.002545 }, "sirius::Potential::xc": { - "avg": 0.00421028571428571, + "avg": 0.005402785714285714, "count": 14, - "max": 0.006567, - "min": 0.003303, - "total": 0.058944 + "max": 0.007893, + "min": 0.003968, + "total": 0.075639 }, "sirius::Potential::xc_rg_nonmagnetic": { - "avg": 0.00420285714285714, + "avg": 0.005396785714285714, "count": 14, - "max": 0.006554, - "min": 0.003297, - "total": 0.05884 + "max": 0.007866, + "min": 0.003964, + "total": 0.075555 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.044523, - "count": 1, - "max": 0.044523, - "min": 0.044523, - "total": 0.044523 + "avg": 0.0495675, + "count": 2, + "max": 0.049601, + "min": 0.049534, + "total": 0.099135 }, "sirius::Radial_integrals|aug": { - "avg": 0.411228, - "count": 1, - "max": 0.411228, - "min": 0.411228, - "total": 0.411228 + "avg": 0.3882365, + "count": 2, + "max": 0.4392, + "min": 0.337273, + "total": 0.776473 }, "sirius::Radial_integrals|beta": { - "avg": 0.104275, + "avg": 0.086697, "count": 2, - "max": 0.105877, - "min": 0.102673, - "total": 0.20855 + "max": 0.088926, + "min": 0.084468, + "total": 0.173394 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.011851, - "count": 1, - "max": 0.011851, - "min": 0.011851, - "total": 0.011851 + "avg": 0.0179205, + "count": 2, + "max": 0.020676, + "min": 0.015165, + "total": 0.035841 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.015915, + "avg": 0.016288, "count": 1, - "max": 0.015915, - "min": 0.015915, - "total": 0.015915 + "max": 0.016288, + "min": 0.016288, + "total": 0.016288 }, "sirius::Radial_integrals|vloc": { - "avg": 0.166621, - "count": 1, - "max": 0.166621, - "min": 0.166621, - "total": 0.166621 + "avg": 0.056150500000000006, + "count": 2, + "max": 0.059505, + "min": 0.052796, + "total": 0.11230100000000001 }, - "sirius::Simulation_context::initialize": { - "avg": 0.834558, + "sirius::Simulation_context::init_atoms_to_grid_idx": { + "avg": 0.001129, "count": 1, - "max": 0.834558, - "min": 0.834558, - "total": 0.834558 + "max": 0.001129, + "min": 0.001129, + "total": 0.001129 }, - "sirius::Simulation_context_base::init_atoms_to_grid_idx": { - "avg": 0.001694, + "sirius::Simulation_context::init_comm": { + "avg": 0.00029, "count": 1, - "max": 0.001694, - "min": 0.001694, - "total": 0.001694 + "max": 0.00029, + "min": 0.00029, + "total": 0.00029 }, - "sirius::Simulation_context_base::initialize": { - "avg": 0.72364, + "sirius::Simulation_context::init_fft": { + "avg": 0.020996, "count": 1, - "max": 0.72364, - "min": 0.72364, - "total": 0.72364 + "max": 0.020996, + "min": 0.020996, + "total": 0.020996 }, - "sirius::Simulation_context_base::make_periodic_function": { - "avg": 0.000380333333333333, - "count": 3, - "max": 0.00039, - "min": 0.000367, - "total": 0.001141 + "sirius::Simulation_context::initialize": { + "avg": 1.400189, + "count": 1, + "max": 1.400189, + "min": 1.400189, + "total": 1.400189 + }, + "sirius::Simulation_context::make_periodic_function": { + "avg": 0.0003186666666666667, + "count": 6, + "max": 0.000492, + "min": 0.000162, + "total": 0.001912 + }, + "sirius::Simulation_context::update": { + "avg": 0.118141, + "count": 1, + "max": 0.118141, + "min": 0.118141, + "total": 0.118141 }, "sirius::Simulation_parameters::import": { - "avg": 0.001295, + "avg": 0.000189, "count": 1, - "max": 0.001295, - "min": 0.001295, - "total": 0.001295 + "max": 0.000189, + "min": 0.000189, + "total": 0.000189 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.000970611764705882, - "count": 85, - "max": 0.001675, - "min": 0.000329, - "total": 0.082502 + "avg": 0.0006792500000000003, + "count": 104, + "max": 0.001976, + "min": 0.00028, + "total": 0.07064200000000002 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 9.75e-05, + "avg": 3.7000000000000005e-05, "count": 2, - "max": 0.000118, - "min": 7.7e-05, - "total": 0.000195 + "max": 4e-05, + "min": 3.4e-05, + "total": 7.400000000000001e-05 + }, + "sirius::Smooth_periodic_function|inner": { + "avg": 8.533548387096773e-05, + "count": 155, + "max": 0.000151, + "min": 6.1e-05, + "total": 0.013226999999999997 + }, + "sirius::Stress|ewald": { + "avg": 0.001343, + "count": 1, + "max": 0.001343, + "min": 0.001343, + "total": 0.001343 }, - "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.000264, + "sirius::Stress|har": { + "avg": 0.000464, "count": 1, - "max": 0.000264, - "min": 0.000264, - "total": 0.000264 + "max": 0.000464, + "min": 0.000464, + "total": 0.000464 }, - "sirius::Unit_cell::get_symmetry": { - "avg": 0.010875, + "sirius::Stress|kin": { + "avg": 0.000125, + "count": 1, + "max": 0.000125, + "min": 0.000125, + "total": 0.000125 + }, + "sirius::Stress|nonloc": { + "avg": 0.00491, + "count": 1, + "max": 0.00491, + "min": 0.00491, + "total": 0.00491 + }, + "sirius::Stress|us": { + "avg": 0.291274, + "count": 1, + "max": 0.291274, + "min": 0.291274, + "total": 0.291274 + }, + "sirius::Stress|us|gemm": { + "avg": 0.002136222222222222, + "count": 9, + "max": 0.002647, + "min": 0.001855, + "total": 0.019226 + }, + "sirius::Stress|us|phase_fac": { + "avg": 0.000142, + "count": 1, + "max": 0.000142, + "min": 0.000142, + "total": 0.000142 + }, + "sirius::Stress|us|prepare": { + "avg": 0.0002008888888888889, + "count": 9, + "max": 0.000318, + "min": 0.00014, + "total": 0.0018080000000000001 + }, + "sirius::Stress|vloc": { + "avg": 0.001038, "count": 1, - "max": 0.010875, - "min": 0.010875, - "total": 0.010875 + "max": 0.001038, + "min": 0.001038, + "total": 0.001038 + }, + "sirius::Unit_cell::find_nearest_neighbours": { + "avg": 0.0002945, + "count": 2, + "max": 0.000489, + "min": 0.0001, + "total": 0.000589 + }, + "sirius::Unit_cell::get_symmetry": { + "avg": 0.023391000000000002, + "count": 2, + "max": 0.023736, + "min": 0.023046, + "total": 0.046782000000000004 }, "sirius::Unit_cell::initialize": { - "avg": 0.040586, + "avg": 0.046099, "count": 1, - "max": 0.040586, - "min": 0.040586, - "total": 0.040586 + "max": 0.046099, + "min": 0.046099, + "total": 0.046099 + }, + "sirius::Unit_cell::update": { + "avg": 0.023698499999999997, + "count": 2, + "max": 0.023853, + "min": 0.023544, + "total": 0.047396999999999995 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { - "avg": 0.010838, - "count": 1, - "max": 0.010838, - "min": 0.010838, - "total": 0.010838 + "avg": 0.023355, + "count": 2, + "max": 0.023691, + "min": 0.023019, + "total": 0.04671 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { - "avg": 0.010648, - "count": 1, - "max": 0.010648, - "min": 0.010648, - "total": 0.010648 + "avg": 0.023168, + "count": 2, + "max": 0.02351, + "min": 0.022826, + "total": 0.046336 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { - "avg": 3.6e-05, - "count": 1, - "max": 3.6e-05, - "min": 3.6e-05, - "total": 3.6e-05 + "avg": 4.85e-05, + "count": 2, + "max": 6.8e-05, + "min": 2.9e-05, + "total": 9.7e-05 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { - "avg": 0.000136, - "count": 1, - "max": 0.000136, - "min": 0.000136, - "total": 0.000136 + "avg": 0.0001095, + "count": 2, + "max": 0.000117, + "min": 0.000102, + "total": 0.000219 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { - "avg": 7e-06, - "count": 1, - "max": 7e-06, - "min": 7e-06, - "total": 7e-06 + "avg": 9.5e-06, + "count": 2, + "max": 1e-05, + "min": 9e-06, + "total": 1.9e-05 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw": { - "avg": 0.0591656153846154, + "avg": 0.005685846153846153, "count": 26, - "max": 0.067941, - "min": 0.055718, - "total": 1.538306 + "max": 0.007505, + "min": 0.004958, + "total": 0.147832 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw|local": { - "avg": 0.0581837307692308, + "avg": 0.004872576923076923, "count": 26, - "max": 0.066757, - "min": 0.054648, - "total": 1.512777 + "max": 0.006401, + "min": 0.004263, + "total": 0.126687 } } } \ No newline at end of file diff --git a/verification/test8/sirius.json b/verification/test8/sirius.json index 058138151..5c2a066f8 100644 --- a/verification/test8/sirius.json +++ b/verification/test8/sirius.json @@ -6,7 +6,9 @@ "verbosity" : 1, "verification" : 0, "print_memory_usage" : false, - "print_checksum" : false + "print_checksum" : false, + "print_forces" : true, + "print_stress" : true }, "parameters" : { diff --git a/verification/test9/output_ref.json b/verification/test9/output_ref.json index 3880e391e..1a8b607fa 100644 --- a/verification/test9/output_ref.json +++ b/verification/test9/output_ref.json @@ -1,925 +1,1086 @@ { - "build_date": "Thu, 6 Sep 2018 09:50:51", "comm_world_size": 1, "counters": { - "band_evp_work_count": 4401.776807843705, - "local_operator_num_applied": 14187 + "band_evp_work_count": 4933.5451231958, + "local_operator_num_applied": 16676 }, - "git_hash": "df65c9ae1af51c668b8f0cc9e4ea1e1bb142f720", + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", "ground_state": { "aw_cutoff": 7.0, "band_gap": 0.0, "chemical_formula": "Ni", "converged": true, "core_leakage": 0.0, - "efermi": 0.6565139588942949, + "efermi": 0.656514250007665, "energy": { - "bxc": -0.0010109827660166732, + "bxc": -0.001011402984635398, "core_eval_sum": 0.0, "enuc": 0.0, - "eval_sum": -10.66477709932868, - "ewald": -111.75579487449036, - "exc": -17.773642603325882, - "kin": 48.798393232383596, - "total": -171.62824549258403, - "veff": -59.46215934894626, - "vha": 107.41349934608857, - "vxc": -22.27170777483915 - }, - "fft_coarse_grid": [18,18,18], + "eval_sum": -10.664778763165614, + "ewald": -111.75579413957684, + "exc": -17.77364275522456, + "kin": 48.79839203324314, + "total": -171.6282459104131, + "veff": -59.462159393424116, + "vha": 107.413498470397, + "vxc": -22.27170757976776 + }, + "fft_coarse_grid": [20,20,20], "fft_grid": [40,40,40], + "forces": [ + [4.312324370118219e-13,4.31042704161251e-13,4.314009838610825e-13] + ], "mpi_grid": [1,1], "num_atoms": 1, "num_bands": 38, "num_fv_states": -1, - "num_scf_iterations": 20, + "num_scf_iterations": 25, "omega": 73.39284359469754, - "pw_cutoff": 25.0 + "pw_cutoff": 25.0, + "stress": [ + [-0.0019160915943802134,3.209883240645393e-35,2.710505431213764e-20], + [3.209883240645393e-35,-0.0019160915943802134,-2.25875452601153e-20], + [2.710505431213764e-20,-2.25875452601153e-20,-0.0019160915943802481] + ] }, "task": 0, "threads_per_rank": 8, "timers": { - "Eigensolver_lapack::solve_std": { - "avg": 0.0030363930635838154, - "count": 346, - "max": 0.008694, - "min": 0.00118, - "total": 1.0505920000000002 - }, - "Eigensolver_lapack::solve_std|zheevr": { - "avg": 0.0030222080924855485, - "count": 346, - "max": 0.008666, - "min": 0.001172, - "total": 1.0456839999999998 + "Eigensolver_lapack|zheevr": { + "avg": 0.0016954684466019409, + "count": 412, + "max": 0.005478, + "min": 0.000727, + "total": 0.6985329999999996 + }, + "Eigensolver_lapack|zhegvx": { + "avg": 0.0009102037037037042, + "count": 216, + "max": 0.001251, + "min": 0.000769, + "total": 0.1966040000000001 }, "sddk::FFT3D::FFT3D": { - "avg": 0.0022310000000000003, + "avg": 0.0031869999999999997, "count": 2, - "max": 0.002494, - "min": 0.001968, - "total": 0.004462000000000001 + "max": 0.004951, + "min": 0.001423, + "total": 0.0063739999999999995 }, "sddk::FFT3D::prepare": { - "avg": 7.272938144329889e-05, - "count": 388, - "max": 0.000238, - "min": 3.8e-05, - "total": 0.02821899999999997 + "avg": 6.442050209205025e-05, + "count": 478, + "max": 0.000215, + "min": 4.7e-05, + "total": 0.030793000000000022 }, "sddk::FFT3D::prepare|cpu": { - "avg": 6.88969072164948e-05, - "count": 388, - "max": 0.000234, - "min": 3.6e-05, - "total": 0.02673199999999998 + "avg": 5.9767782426778246e-05, + "count": 478, + "max": 0.000177, + "min": 4.3e-05, + "total": 0.028569 }, "sddk::FFT3D::transform": { - "avg": 0.0004096741146120022, - "count": 92643, - "max": 0.005248, - "min": 0.000204, - "total": 37.95343899999972 + "avg": 0.00014875313337052014, + "count": 109387, + "max": 0.002772, + "min": 9.5e-05, + "total": 16.271659000001087 }, "sddk::FFT3D::transform_xy": { - "avg": 0.00011670004209709307, - "count": 92643, - "max": 0.001511, - "min": 8.5e-05, - "total": 10.811442000000993 + "avg": 6.546954391290364e-05, + "count": 109387, + "max": 0.002187, + "min": 3.4e-05, + "total": 7.161517000000791 }, "sddk::FFT3D::transform_z": { - "avg": 0.0002873646254978861, - "count": 92643, - "max": 0.004866, - "min": 9.3e-05, - "total": 26.62232100000066 + "avg": 7.73817821130533e-05, + "count": 109387, + "max": 0.001612, + "min": 4.2e-05, + "total": 8.464561000000561 }, "sddk::FFT3D::transform_z_serial": { - "avg": 0.0002846808285569428, - "count": 92643, - "max": 0.004854, - "min": 9.1e-05, - "total": 26.37368600000085 + "avg": 7.48567197199017e-05, + "count": 109387, + "max": 0.001609, + "min": 4e-05, + "total": 8.188352000000886 }, "sddk::FFT3D::transform_z_serial|cpu": { - "avg": 0.0002816616797815347, - "count": 92643, - "max": 0.004848, - "min": 8.9e-05, - "total": 26.09398300000072 + "avg": 7.205200800827431e-05, + "count": 109387, + "max": 0.0016, + "min": 3.8e-05, + "total": 7.881553000001102 }, "sddk::Gvec::find_gvec_shells": { - "avg": 0.00021379999999999997, + "avg": 0.00019810000000000004, "count": 20, - "max": 0.001152, - "min": 5.6e-05, - "total": 0.004275999999999999 + "max": 0.000844, + "min": 5.3e-05, + "total": 0.003962000000000001 }, "sddk::Gvec::init": { - "avg": 0.0007190000000000001, + "avg": 0.0005337000000000001, "count": 10, - "max": 0.004353, - "min": 0.000164, - "total": 0.007190000000000001 - }, - "sddk::Wave_functions::inner": { - "avg": 0.0010071100719424467, - "count": 1390, - "max": 0.005401, - "min": 1e-06, - "total": 1.3998830000000009 - }, - "sddk::Wave_functions::orthogonalize": { - "avg": 0.00469957803468208, - "count": 346, - "max": 0.014489, - "min": 0.0002, - "total": 1.6260539999999999 - }, - "sddk::Wave_functions::transform": { - "avg": 0.002170299097065463, - "count": 886, - "max": 0.0088, - "min": 0.000111, - "total": 1.9228850000000002 - }, - "sddk::Wave_functions::transform|init": { - "avg": 7.793792325056423e-05, - "count": 886, - "max": 0.000618, - "min": 1e-06, - "total": 0.0690529999999999 + "max": 0.002938, + "min": 0.000134, + "total": 0.0053370000000000015 + }, + "sddk::inner": { + "avg": 0.00013706534772182227, + "count": 1668, + "max": 0.00039, + "min": 3e-06, + "total": 0.22862499999999955 + }, + "sddk::inner|local": { + "avg": 0.00013371642685851338, + "count": 1668, + "max": 0.000384, + "min": 2e-06, + "total": 0.22303900000000035 }, "sddk::matrix_storage::matrix_storage": { - "avg": 2.3707061068702924e-06, - "count": 2096, - "max": 0.000264, + "avg": 8.955745341614831e-07, + "count": 2576, + "max": 1.7e-05, "min": 0.0, - "total": 0.004969000000000133 + "total": 0.0023069999999999805 }, "sddk::matrix_storage::remap_backward": { - "avg": 5.325670498084356e-07, - "count": 1044, - "max": 1.3e-05, + "avg": 6.257961783439576e-07, + "count": 1256, + "max": 7e-06, "min": 0.0, - "total": 0.0005560000000000067 + "total": 0.0007860000000000109 }, "sddk::matrix_storage::remap_forward": { - "avg": 1.9137681159420233e-06, - "count": 1380, - "max": 4.4e-05, + "avg": 3.4043062200956807e-06, + "count": 1672, + "max": 4.3e-05, + "min": 1e-06, + "total": 0.005691999999999978 + }, + "sddk::matrix_storage::set_num_extra": { + "avg": 8.346994535519114e-07, + "count": 2928, + "max": 3.9e-05, "min": 0.0, - "total": 0.002640999999999992 + "total": 0.0024439999999999965 + }, + "sddk::orthogonalize": { + "avg": 0.0005975339805825236, + "count": 412, + "max": 0.001485, + "min": 0.000114, + "total": 0.24618399999999974 + }, + "sddk::orthogonalize|tmtrx": { + "avg": 3.488592233009715e-05, + "count": 412, + "max": 0.00015, + "min": 1e-06, + "total": 0.014373000000000026 + }, + "sddk::orthogonalize|transform": { + "avg": 8.079611650485426e-05, + "count": 412, + "max": 0.000249, + "min": 5e-06, + "total": 0.033287999999999957 }, "sddk::remap_gvec_to_shells|init": { - "avg": 0.004211, + "avg": 0.002212, "count": 1, - "max": 0.004211, - "min": 0.004211, - "total": 0.004211 + "max": 0.002212, + "min": 0.002212, + "total": 0.002212 }, "sddk::remap_gvec_to_shells|remap_backward": { - "avg": 0.0012398452380952389, - "count": 168, - "max": 0.002114, - "min": 0.001037, - "total": 0.20829400000000015 + "avg": 0.0001973701923076922, + "count": 208, + "max": 0.000425, + "min": 0.000158, + "total": 0.04105299999999998 }, "sddk::remap_gvec_to_shells|remap_forward": { - "avg": 0.0012205535714285714, - "count": 168, - "max": 0.001811, - "min": 0.001023, - "total": 0.20505299999999999 + "avg": 0.0001936730769230768, + "count": 208, + "max": 0.000368, + "min": 0.000169, + "total": 0.04028399999999997 + }, + "sddk::transform": { + "avg": 0.00020157407407407404, + "count": 1080, + "max": 0.00062, + "min": 4.9e-05, + "total": 0.21769999999999998 + }, + "sddk::transform|init": { + "avg": 2.380370370370376e-05, + "count": 1080, + "max": 0.000415, + "min": 1e-06, + "total": 0.02570800000000006 + }, + "sddk::transform|local": { + "avg": 7.80454159592529e-05, + "count": 2356, + "max": 0.000489, + "min": 1.9e-05, + "total": 0.18387499999999982 }, "sirius::Atom_type::init": { - "avg": 0.023392, + "avg": 0.025208, "count": 1, - "max": 0.023392, - "min": 0.023392, - "total": 0.023392 + "max": 0.025208, + "min": 0.025208, + "total": 0.025208 }, "sirius::Augmentation_operator::generate_pw_coeffs": { - "avg": 0.072145, + "avg": 0.067034, "count": 1, - "max": 0.072145, - "min": 0.072145, - "total": 0.072145 + "max": 0.067034, + "min": 0.067034, + "total": 0.067034 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { + "avg": 0.04757549999999999, + "count": 12, + "max": 0.059723, + "min": 0.041093, + "total": 0.5709059999999999 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { + "avg": 0.04333099999999999, + "count": 12, + "max": 0.054546, + "min": 0.03684, + "total": 0.5199719999999999 + }, + "sirius::Augmentation_operator_gvec_deriv|constructor": { + "avg": 0.056933, + "count": 1, + "max": 0.056933, + "min": 0.056933, + "total": 0.056933 }, "sirius::Band::diag_pseudo_potential_davidson": { - "avg": 0.26679659523809524, - "count": 168, - "max": 0.578207, - "min": 0.166411, - "total": 44.821827999999996 + "avg": 0.1202287163461538, + "count": 208, + "max": 0.223928, + "min": 0.057594, + "total": 25.00757299999999 }, "sirius::Band::diag_pseudo_potential_davidson|alloc": { - "avg": 0.00013795833333333347, - "count": 168, - "max": 0.000911, - "min": 0.000101, - "total": 0.02317700000000002 + "avg": 3.990865384615383e-05, + "count": 208, + "max": 8.9e-05, + "min": 3e-05, + "total": 0.008300999999999998 }, "sirius::Band::diag_pseudo_potential_davidson|evp": { - "avg": 0.002383642023346303, - "count": 514, - "max": 0.008697, - "min": 0.000826, - "total": 1.2251919999999996 + "avg": 0.0014356677419354832, + "count": 620, + "max": 0.005482, + "min": 0.000729, + "total": 0.8901139999999996 }, "sirius::Band::diag_pseudo_potential_davidson|iter": { - "avg": 0.2654392380952382, - "count": 168, - "max": 0.577359, - "min": 0.16562, - "total": 44.593792000000015 + "avg": 0.11956309615384608, + "count": 208, + "max": 0.222937, + "min": 0.057029, + "total": 24.869123999999985 }, "sirius::Band::diag_pseudo_potential_davidson|update_phi": { - "avg": 0.002479761904761904, - "count": 168, - "max": 0.00494, - "min": 0.001346, - "total": 0.41659999999999986 - }, - "sirius::Band::diag_pseudo_potential_davidson|wf": { - "avg": 3.817857142857143e-05, - "count": 168, - "max": 0.000148, - "min": 2.3e-05, - "total": 0.006414 + "avg": 0.00018023076923076922, + "count": 208, + "max": 0.000528, + "min": 9.8e-05, + "total": 0.037488 }, "sirius::Band::initialize_subspace": { - "avg": 1.091483, + "avg": 0.430926, "count": 1, - "max": 1.091483, - "min": 1.091483, - "total": 1.091483 + "max": 0.430926, + "min": 0.430926, + "total": 0.430926 }, "sirius::Band::initialize_subspace|kp": { - "avg": 0.13605075, + "avg": 0.053737375, "count": 8, - "max": 0.144128, - "min": 0.132203, - "total": 1.088406 + "max": 0.05484, + "min": 0.052523, + "total": 0.429899 }, "sirius::Band::initialize_subspace|kp|wf": { - "avg": 0.027489375000000003, + "avg": 0.000296125, "count": 8, - "max": 0.029968, - "min": 0.026036, - "total": 0.21991500000000003 + "max": 0.000334, + "min": 0.000268, + "total": 0.002369 }, "sirius::Band::residuals": { - "avg": 0.0019320116731517524, - "count": 514, - "max": 0.007195, + "avg": 0.0005777854838709669, + "count": 620, + "max": 0.001337, "min": 0.0, - "total": 0.9930540000000008 + "total": 0.35822699999999946 }, "sirius::Band::residuals_aux": { - "avg": 0.0009061428571428576, - "count": 364, - "max": 0.001738, - "min": 0.000403, - "total": 0.3298360000000002 + "avg": 0.0006156084070796459, + "count": 452, + "max": 0.000836, + "min": 0.000513, + "total": 0.278255 }, "sirius::Band::set_subspace_mtrx": { - "avg": 0.001477628939828081, - "count": 698, - "max": 0.005828, - "min": 0.000183, - "total": 1.0313850000000004 + "avg": 0.0002951409952606636, + "count": 844, + "max": 0.000637, + "min": 0.00014, + "total": 0.24909900000000007 }, "sirius::Band::solve": { - "avg": 2.1392889047619046, - "count": 21, - "max": 3.818727, - "min": 1.522265, - "total": 44.925067 + "avg": 0.9635754615384614, + "count": 26, + "max": 1.698416, + "min": 0.479334, + "total": 25.052961999999997 }, "sirius::Beta_projectors::Beta_projectors": { - "avg": 0.000184875, + "avg": 0.000222125, "count": 8, - "max": 0.000213, - "min": 0.000145, - "total": 0.001479 + "max": 0.000348, + "min": 0.000133, + "total": 0.001777 }, "sirius::Beta_projectors::generate_pw_coefs_t": { - "avg": 0.00018112499999999997, + "avg": 0.00018262500000000003, "count": 8, - "max": 0.00021, - "min": 0.000141, - "total": 0.0014489999999999998 + "max": 0.000284, + "min": 0.000108, + "total": 0.0014610000000000003 }, "sirius::Beta_projectors_base::dismiss": { - "avg": 3.982558139534876e-07, - "count": 344, - "max": 5e-06, + "avg": 3.815789473684202e-07, + "count": 456, + "max": 6e-06, "min": 0.0, - "total": 0.00013699999999999973 + "total": 0.00017399999999999962 }, "sirius::Beta_projectors_base::generate": { - "avg": 2.7610144927536306e-05, - "count": 690, - "max": 8.8e-05, - "min": 2.3e-05, - "total": 0.01905100000000005 + "avg": 2.9259615384615396e-05, + "count": 104, + "max": 6.2e-05, + "min": 1.9e-05, + "total": 0.003043000000000001 }, "sirius::Beta_projectors_base::inner": { - "avg": 0.00018381739130434824, - "count": 1380, - "max": 0.001163, - "min": 8e-06, - "total": 0.25366800000000056 + "avg": 4.4521097046413545e-05, + "count": 1896, + "max": 0.000128, + "min": 7e-06, + "total": 0.08441200000000008 + }, + "sirius::Beta_projectors_base::local_inner_aux": { + "avg": 4.093934599156124e-05, + "count": 1896, + "max": 0.000121, + "min": 5e-06, + "total": 0.07762100000000012 }, "sirius::Beta_projectors_base::prepare": { - "avg": 9.534883720930269e-07, - "count": 344, - "max": 2.1e-05, - "min": 0.0, - "total": 0.00032800000000000125 + "avg": 1.5000000000000005e-06, + "count": 16, + "max": 2e-06, + "min": 1e-06, + "total": 2.4000000000000007e-05 + }, + "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { + "avg": 0.000922375, + "count": 8, + "max": 0.00159, + "min": 0.000585, + "total": 0.007379 }, "sirius::Broyden1::mix": { - "avg": 0.002139047619047619, - "count": 21, - "max": 0.00307, - "min": 0.000307, - "total": 0.04492 - }, - "sirius::DFT_ground_state::compute_atomic_mag_mom": { - "avg": 0.011421136363636368, - "count": 22, - "max": 0.01295, - "min": 0.000348, - "total": 0.25126500000000007 + "avg": 0.002122269230769231, + "count": 26, + "max": 0.003687, + "min": 7e-05, + "total": 0.055179000000000006 }, "sirius::DFT_ground_state::ewald_energy": { - "avg": 0.038855, + "avg": 0.000531, "count": 1, - "max": 0.038855, - "min": 0.038855, - "total": 0.038855 + "max": 0.000531, + "min": 0.000531, + "total": 0.000531 }, "sirius::DFT_ground_state::scf_loop": { - "avg": 54.459923, + "avg": 29.559268, "count": 1, - "max": 54.459923, - "min": 54.459923, - "total": 54.459923 + "max": 29.559268, + "min": 29.559268, + "total": 29.559268 }, "sirius::DFT_ground_state::scf_loop|iteration": { - "avg": 2.5926808571428572, - "count": 21, - "max": 4.287296, - "min": 1.965889, - "total": 54.446298000000006 + "avg": 1.1365299615384612, + "count": 26, + "max": 1.864239, + "min": 0.66581, + "total": 29.549778999999994 }, "sirius::Density::add_k_point_contribution_dm": { - "avg": 0.0005340833333333331, - "count": 168, - "max": 0.001112, - "min": 0.000418, - "total": 0.08972599999999997 + "avg": 0.0002818750000000001, + "count": 208, + "max": 0.000433, + "min": 0.000228, + "total": 0.05863000000000003 }, "sirius::Density::add_k_point_contribution_rg": { - "avg": 0.01734484523809524, - "count": 168, - "max": 0.027571, - "min": 0.01444, - "total": 2.9139340000000002 + "avg": 0.007623153846153839, + "count": 208, + "max": 0.010544, + "min": 0.006414, + "total": 1.5856159999999986 }, "sirius::Density::augment": { - "avg": 0.07885552380952382, - "count": 21, - "max": 0.083501, - "min": 0.072651, - "total": 1.6559660000000003 + "avg": 0.033785115384615386, + "count": 26, + "max": 0.045658, + "min": 0.030672, + "total": 0.878413 + }, + "sirius::Density::compute_atomic_mag_mom": { + "avg": 0.000224, + "count": 1, + "max": 0.000224, + "min": 0.000224, + "total": 0.000224 }, "sirius::Density::generate": { - "avg": 0.2249563333333333, - "count": 21, - "max": 0.253827, - "min": 0.214597, - "total": 4.724082999999999 + "avg": 0.09806865384615386, + "count": 26, + "max": 0.114156, + "min": 0.088864, + "total": 2.5497850000000004 }, "sirius::Density::generate_pseudo_core_charge_density": { - "avg": 0.041356, + "avg": 0.001122, "count": 1, - "max": 0.041356, - "min": 0.041356, - "total": 0.041356 + "max": 0.001122, + "min": 0.001122, + "total": 0.001122 }, "sirius::Density::generate_rho_aug": { - "avg": 0.07818133333333332, - "count": 21, - "max": 0.082828, - "min": 0.071889, - "total": 1.6418079999999997 + "avg": 0.033503346153846154, + "count": 26, + "max": 0.045371, + "min": 0.030394, + "total": 0.8710870000000001 }, "sirius::Density::generate_rho_aug|gemm": { - "avg": 0.006736011904761906, - "count": 84, - "max": 0.019566, - "min": 0.002998, - "total": 0.5658250000000001 - }, - "sirius::Density::generate_rho_aug|phase_fac": { - "avg": 0.04035757142857144, - "count": 21, - "max": 0.043463, - "min": 0.039229, - "total": 0.8475090000000002 + "avg": 0.005936259615384614, + "count": 104, + "max": 0.020818, + "min": 0.001935, + "total": 0.6173709999999999 }, "sirius::Density::generate_rho_aug|sum": { - "avg": 0.002028916666666666, - "count": 84, - "max": 0.003517, - "min": 0.0016, - "total": 0.17042899999999994 + "avg": 0.001836836538461538, + "count": 104, + "max": 0.003942, + "min": 0.001365, + "total": 0.19103099999999995 }, "sirius::Density::generate_valence": { - "avg": 0.22494938095238093, - "count": 21, - "max": 0.253823, - "min": 0.214593, - "total": 4.723936999999999 + "avg": 0.09806403846153847, + "count": 26, + "max": 0.114152, + "min": 0.088861, + "total": 2.549665 }, "sirius::Density::initial_density": { - "avg": 0.05643, + "avg": 0.006274, "count": 1, - "max": 0.05643, - "min": 0.05643, - "total": 0.05643 + "max": 0.006274, + "min": 0.006274, + "total": 0.006274 }, "sirius::Density::symmetrize_density_matrix": { - "avg": 0.0041038095238095235, - "count": 21, - "max": 0.004973, - "min": 0.003577, - "total": 0.08617999999999999 + "avg": 0.0037466538461538466, + "count": 26, + "max": 0.004637, + "min": 0.003357, + "total": 0.09741300000000001 }, "sirius::Density::update": { - "avg": 0.041605, + "avg": 0.001137, "count": 1, - "max": 0.041605, - "min": 0.041605, - "total": 0.041605 + "max": 0.001137, + "min": 0.001137, + "total": 0.001137 }, "sirius::Field4D::symmetrize": { - "avg": 0.016069833333333332, - "count": 42, - "max": 0.018568, - "min": 0.014456, - "total": 0.674933 + "avg": 0.0072001538461538474, + "count": 52, + "max": 0.012111, + "min": 0.006337, + "total": 0.3744080000000001 + }, + "sirius::Force::calc_forces_core": { + "avg": 0.001065, + "count": 1, + "max": 0.001065, + "min": 0.001065, + "total": 0.001065 + }, + "sirius::Force::calc_forces_ewald": { + "avg": 0.00084, + "count": 1, + "max": 0.00084, + "min": 0.00084, + "total": 0.00084 + }, + "sirius::Force::calc_forces_nonloc": { + "avg": 0.018368, + "count": 1, + "max": 0.018368, + "min": 0.018368, + "total": 0.018368 + }, + "sirius::Force::calc_forces_scf_corr": { + "avg": 0.000335, + "count": 1, + "max": 0.000335, + "min": 0.000335, + "total": 0.000335 + }, + "sirius::Force::calc_forces_us": { + "avg": 0.020247, + "count": 1, + "max": 0.020247, + "min": 0.020247, + "total": 0.020247 + }, + "sirius::Force::calc_forces_vloc": { + "avg": 0.000683, + "count": 1, + "max": 0.000683, + "min": 0.000683, + "total": 0.000683 }, "sirius::Hamiltonian::apply_h_s": { - "avg": 0.07672624712643682, - "count": 522, - "max": 0.141204, - "min": 0.003053, - "total": 40.05110100000002 + "avg": 0.037388304140127386, + "count": 628, + "max": 0.062024, + "min": 0.001618, + "total": 23.479854999999997 }, "sirius::Hamiltonian::get_h_diag": { - "avg": 0.0005703035714285714, - "count": 168, - "max": 0.000885, - "min": 0.000382, - "total": 0.09581100000000001 + "avg": 0.0003688317307692305, + "count": 208, + "max": 0.000866, + "min": 0.000304, + "total": 0.07671699999999995 }, "sirius::Hamiltonian::get_o_diag": { - "avg": 0.0002866845238095238, - "count": 168, - "max": 0.000455, - "min": 0.000189, - "total": 0.048163 - }, - "sirius::Hamiltonian::prepare": { - "avg": 1.6181818181818178e-05, - "count": 22, - "max": 2.8e-05, - "min": 1.4e-05, - "total": 0.0003559999999999999 + "avg": 0.00017999038461538467, + "count": 208, + "max": 0.000271, + "min": 0.000145, + "total": 0.03743800000000001 }, "sirius::K_point::K_point": { - "avg": 8.75e-07, + "avg": 1e-06, "count": 8, - "max": 5e-06, + "max": 4e-06, "min": 0.0, - "total": 7e-06 + "total": 8e-06 }, "sirius::K_point::generate_gkvec": { - "avg": 0.00031662499999999996, + "avg": 0.000243875, "count": 8, - "max": 0.000457, - "min": 0.0002, - "total": 0.0025329999999999997 + "max": 0.000416, + "min": 0.000143, + "total": 0.001951 }, "sirius::K_point::initialize": { - "avg": 0.0008957500000000001, + "avg": 0.0006168750000000001, "count": 8, - "max": 0.001273, - "min": 0.000674, - "total": 0.0071660000000000005 + "max": 0.000989, + "min": 0.000375, + "total": 0.004935000000000001 }, "sirius::K_point::update": { - "avg": 0.0003441249999999999, + "avg": 0.000349, "count": 8, - "max": 0.000406, - "min": 0.00025, - "total": 0.0027529999999999994 + "max": 0.000542, + "min": 0.000207, + "total": 0.002792 }, "sirius::K_point_set::add_kpoint": { - "avg": 4.125e-06, + "avg": 3.750000000000001e-06, "count": 8, - "max": 1.6e-05, + "max": 1.5e-05, "min": 2e-06, - "total": 3.3e-05 + "total": 3.0000000000000008e-05 }, "sirius::K_point_set::create_k_mesh": { - "avg": 0.021561, + "avg": 0.03291, "count": 1, - "max": 0.021561, - "min": 0.021561, - "total": 0.021561 + "max": 0.03291, + "min": 0.03291, + "total": 0.03291 }, "sirius::K_point_set::find_band_occupancies": { - "avg": 0.000504952380952381, - "count": 21, - "max": 0.000994, - "min": 1e-05, - "total": 0.010604 + "avg": 0.0005033461538461539, + "count": 26, + "max": 0.000738, + "min": 1.2e-05, + "total": 0.013087000000000001 }, "sirius::K_point_set::initialize": { - "avg": 0.007449, + "avg": 0.005077, "count": 1, - "max": 0.007449, - "min": 0.007449, - "total": 0.007449 + "max": 0.005077, + "min": 0.005077, + "total": 0.005077 }, "sirius::K_point_set::sync_band_energies": { - "avg": 1.4857142857142857e-05, - "count": 21, - "max": 5.2e-05, - "min": 9e-06, - "total": 0.000312 + "avg": 6.307692307692309e-06, + "count": 26, + "max": 1.1e-05, + "min": 5e-06, + "total": 0.00016400000000000003 }, "sirius::Local_operator::apply_h": { - "avg": 0.07476950766283529, - "count": 522, - "max": 0.137215, - "min": 0.002701, - "total": 39.02968300000002 + "avg": 0.036739941082802534, + "count": 628, + "max": 0.061175, + "min": 0.001242, + "total": 23.07268299999999 }, "sirius::Local_operator::prepare": { - "avg": 0.00043934343434343337, - "count": 198, - "max": 0.005385, + "avg": 0.00011822222222222208, + "count": 243, + "max": 0.001117, "min": 7e-06, - "total": 0.0869899999999998 + "total": 0.028727999999999965 }, "sirius::Non_local_operator::Non_local_operator": { - "avg": 8.636363636363635e-07, - "count": 44, + "avg": 1.0925925925925925e-06, + "count": 54, "max": 2e-06, "min": 0.0, - "total": 3.7999999999999995e-05 + "total": 5.899999999999999e-05 }, "sirius::Non_local_operator::apply": { - "avg": 0.0002278393997445721, - "count": 3132, - "max": 0.000726, - "min": 4.1e-05, - "total": 0.7135929999999998 + "avg": 8.477016985138036e-05, + "count": 3768, + "max": 0.000457, + "min": 4.9e-05, + "total": 0.3194140000000012 }, "sirius::Periodic_function::add": { - "avg": 6.940909090909091e-05, - "count": 44, - "max": 0.000155, - "min": 4.9e-05, - "total": 0.003054 + "avg": 6.91851851851852e-05, + "count": 54, + "max": 0.000107, + "min": 5.4e-05, + "total": 0.0037360000000000006 }, "sirius::Periodic_function::inner": { - "avg": 6.497018348623851e-05, - "count": 436, - "max": 0.000192, - "min": 4.8e-05, - "total": 0.02832699999999999 + "avg": 7.234758364312265e-05, + "count": 538, + "max": 0.000127, + "min": 5.4e-05, + "total": 0.038922999999999985 }, "sirius::Periodic_function::integrate": { - "avg": 5.656470588235293e-05, - "count": 85, - "max": 8.1e-05, - "min": 4.8e-05, - "total": 0.004807999999999999 + "avg": 6.676190476190481e-05, + "count": 105, + "max": 0.000106, + "min": 5.2e-05, + "total": 0.007010000000000004 }, "sirius::Potential::Potential": { - "avg": 0.044804, + "avg": 0.00497, "count": 1, - "max": 0.044804, - "min": 0.044804, - "total": 0.044804 + "max": 0.00497, + "min": 0.00497, + "total": 0.00497 }, "sirius::Potential::generate": { - "avg": 0.15403063636363637, - "count": 22, - "max": 0.178269, - "min": 0.148201, - "total": 3.388674 + "avg": 0.04746707407407408, + "count": 27, + "max": 0.058739, + "min": 0.04318, + "total": 1.281611 }, "sirius::Potential::generate_D_operator_matrix": { - "avg": 0.015514090909090906, - "count": 22, - "max": 0.020451, - "min": 0.013948, - "total": 0.34130999999999995 + "avg": 0.006026888888888889, + "count": 27, + "max": 0.007259, + "min": 0.005441, + "total": 0.162726 }, "sirius::Potential::generate_PAW_effective_potential": { - "avg": 1.181818181818182e-06, - "count": 22, - "max": 1.4e-05, + "avg": 4.4444444444444454e-07, + "count": 27, + "max": 1e-06, "min": 0.0, - "total": 2.6000000000000002e-05 + "total": 1.2000000000000002e-05 }, "sirius::Potential::generate_local_potential": { - "avg": 0.043232, + "avg": 0.001596, "count": 1, - "max": 0.043232, - "min": 0.043232, - "total": 0.043232 + "max": 0.001596, + "min": 0.001596, + "total": 0.001596 }, "sirius::Potential::poisson": { - "avg": 0.04473554545454545, - "count": 22, - "max": 0.050452, - "min": 0.043564, - "total": 0.9841819999999999 + "avg": 0.0007348518518518519, + "count": 27, + "max": 0.000981, + "min": 0.000595, + "total": 0.019841 }, "sirius::Potential::update": { - "avg": 0.043479, + "avg": 0.001611, "count": 1, - "max": 0.043479, - "min": 0.043479, - "total": 0.043479 + "max": 0.001611, + "min": 0.001611, + "total": 0.001611 }, "sirius::Potential::xc": { - "avg": 0.08298681818181818, - "count": 22, - "max": 0.096714, - "min": 0.078752, - "total": 1.82571 + "avg": 0.03873048148148148, + "count": 27, + "max": 0.048404, + "min": 0.035123, + "total": 1.045723 }, "sirius::Potential::xc_rg_magnetic": { - "avg": 0.08298100000000001, - "count": 22, - "max": 0.096707, - "min": 0.078748, - "total": 1.8255820000000003 + "avg": 0.03872425925925926, + "count": 27, + "max": 0.048396, + "min": 0.035117, + "total": 1.045555 }, "sirius::Potential::xc_rg_magnetic|grad1": { - "avg": 0.028945681818181822, - "count": 22, - "max": 0.031066, - "min": 0.026958, - "total": 0.6368050000000001 + "avg": 0.010490481481481483, + "count": 27, + "max": 0.01235, + "min": 0.008872, + "total": 0.283243 }, "sirius::Potential::xc_rg_magnetic|grad2": { - "avg": 0.039553090909090906, - "count": 22, - "max": 0.045649, - "min": 0.036246, - "total": 0.8701679999999999 + "avg": 0.014416629629629628, + "count": 27, + "max": 0.018941, + "min": 0.013016, + "total": 0.38924899999999996 }, "sirius::Potential::xc_rg_magnetic|libxc": { - "avg": 0.010329136363636362, - "count": 22, - "max": 0.015998, - "min": 0.00736, - "total": 0.22724099999999997 + "avg": 0.004754629629629631, + "count": 27, + "max": 0.00615, + "min": 0.002731, + "total": 0.12837500000000004 }, "sirius::Potential::xc_rg_magnetic|up_dn": { - "avg": 0.0005264090909090908, - "count": 22, - "max": 0.000998, - "min": 0.000479, - "total": 0.011581 + "avg": 0.0005578888888888889, + "count": 27, + "max": 0.000846, + "min": 0.000451, + "total": 0.015063 }, "sirius::Radial_integrals|atomic_centered_wfc": { - "avg": 0.0866285, + "avg": 0.09750500000000001, "count": 2, - "max": 0.087847, - "min": 0.08541, - "total": 0.173257 + "max": 0.106908, + "min": 0.088102, + "total": 0.19501000000000002 }, "sirius::Radial_integrals|aug": { - "avg": 0.44958200000000004, + "avg": 0.47775, "count": 2, - "max": 0.51239, - "min": 0.386774, - "total": 0.8991640000000001 + "max": 0.533911, + "min": 0.421589, + "total": 0.9555 }, "sirius::Radial_integrals|beta": { - "avg": 0.0806605, + "avg": 0.0846475, "count": 2, - "max": 0.085445, - "min": 0.075876, - "total": 0.161321 + "max": 0.087398, + "min": 0.081897, + "total": 0.169295 }, "sirius::Radial_integrals|rho_core_pseudo": { - "avg": 0.024008500000000002, + "avg": 0.024718, "count": 2, - "max": 0.027949, - "min": 0.020068, - "total": 0.048017000000000004 + "max": 0.02676, + "min": 0.022676, + "total": 0.049436 }, "sirius::Radial_integrals|rho_pseudo": { - "avg": 0.01935, + "avg": 0.020838, "count": 1, - "max": 0.01935, - "min": 0.01935, - "total": 0.01935 + "max": 0.020838, + "min": 0.020838, + "total": 0.020838 }, "sirius::Radial_integrals|vloc": { - "avg": 0.0708805, + "avg": 0.070633, "count": 2, - "max": 0.078384, - "min": 0.063377, - "total": 0.141761 + "max": 0.076832, + "min": 0.064434, + "total": 0.141266 }, "sirius::Simulation_context::init_atoms_to_grid_idx": { - "avg": 0.004354, + "avg": 0.001429, "count": 1, - "max": 0.004354, - "min": 0.004354, - "total": 0.004354 + "max": 0.001429, + "min": 0.001429, + "total": 0.001429 }, "sirius::Simulation_context::init_comm": { - "avg": 0.000544, + "avg": 0.000246, "count": 1, - "max": 0.000544, - "min": 0.000544, - "total": 0.000544 + "max": 0.000246, + "min": 0.000246, + "total": 0.000246 }, "sirius::Simulation_context::init_fft": { - "avg": 0.014965, + "avg": 0.012589, "count": 1, - "max": 0.014965, - "min": 0.014965, - "total": 0.014965 + "max": 0.012589, + "min": 0.012589, + "total": 0.012589 }, "sirius::Simulation_context::initialize": { - "avg": 1.634293, + "avg": 1.699048, "count": 1, - "max": 1.634293, - "min": 1.634293, - "total": 1.634293 + "max": 1.699048, + "min": 1.699048, + "total": 1.699048 }, "sirius::Simulation_context::make_periodic_function": { - "avg": 0.04007733333333333, - "count": 3, - "max": 0.041187, - "min": 0.038589, - "total": 0.120232 + "avg": 0.00024716666666666664, + "count": 6, + "max": 0.000393, + "min": 0.000159, + "total": 0.001483 }, "sirius::Simulation_context::update": { - "avg": 0.139113, + "avg": 0.099655, "count": 1, - "max": 0.139113, - "min": 0.139113, - "total": 0.139113 + "max": 0.099655, + "min": 0.099655, + "total": 0.099655 }, "sirius::Simulation_parameters::import": { - "avg": 0.001095, + "avg": 0.000186, "count": 1, - "max": 0.001095, - "min": 0.001095, - "total": 0.001095 + "max": 0.000186, + "min": 0.000186, + "total": 0.000186 }, "sirius::Smooth_periodic_function::fft_transform": { - "avg": 0.0023548384697130715, - "count": 941, - "max": 0.005357, - "min": 0.000446, - "total": 2.2159030000000004 + "avg": 0.00046733475661827527, + "count": 1171, + "max": 0.001254, + "min": 0.000127, + "total": 0.5472490000000003 }, "sirius::Smooth_periodic_function::gather_f_pw": { - "avg": 4.625e-05, + "avg": 2.3125e-05, "count": 8, - "max": 0.000123, - "min": 2.9e-05, - "total": 0.00037 - }, - "sirius::Smooth_periodic_function_gradient|dot": { - "avg": 0.00040437662337662324, - "count": 154, - "max": 0.000593, - "min": 0.000288, - "total": 0.062273999999999975 - }, - "sirius::Smooth_periodic_function_gradient|gradient": { - "avg": 0.0004025818181818181, - "count": 110, - "max": 0.000723, - "min": 0.000144, - "total": 0.04428399999999999 - }, - "sirius::Smooth_periodic_function_gradient|laplacian": { - "avg": 0.000244840909090909, - "count": 44, - "max": 0.000324, - "min": 0.000212, - "total": 0.010772999999999996 + "max": 3.3e-05, + "min": 1.7e-05, + "total": 0.000185 }, "sirius::Smooth_periodic_function|inner": { - "avg": 6.277445109780441e-05, - "count": 501, - "max": 0.000188, - "min": 4.7e-05, - "total": 0.031450000000000006 + "avg": 6.984652665589646e-05, + "count": 619, + "max": 0.000125, + "min": 5.2e-05, + "total": 0.043234999999999905 + }, + "sirius::Stress|ewald": { + "avg": 0.000858, + "count": 1, + "max": 0.000858, + "min": 0.000858, + "total": 0.000858 + }, + "sirius::Stress|har": { + "avg": 0.000281, + "count": 1, + "max": 0.000281, + "min": 0.000281, + "total": 0.000281 + }, + "sirius::Stress|kin": { + "avg": 0.001601, + "count": 1, + "max": 0.001601, + "min": 0.001601, + "total": 0.001601 + }, + "sirius::Stress|nonloc": { + "avg": 0.051411, + "count": 1, + "max": 0.051411, + "min": 0.051411, + "total": 0.051411 + }, + "sirius::Stress|us": { + "avg": 0.67746, + "count": 1, + "max": 0.67746, + "min": 0.67746, + "total": 0.67746 + }, + "sirius::Stress|us|gemm": { + "avg": 0.0010714166666666667, + "count": 36, + "max": 0.001527, + "min": 0.000939, + "total": 0.038571 + }, + "sirius::Stress|us|phase_fac": { + "avg": 0.00015, + "count": 1, + "max": 0.00015, + "min": 0.00015, + "total": 0.00015 + }, + "sirius::Stress|us|prepare": { + "avg": 0.00012480555555555557, + "count": 36, + "max": 0.000433, + "min": 9.3e-05, + "total": 0.0044930000000000005 + }, + "sirius::Stress|vloc": { + "avg": 0.001065, + "count": 1, + "max": 0.001065, + "min": 0.001065, + "total": 0.001065 }, "sirius::Unit_cell::find_nearest_neighbours": { - "avg": 0.00025699999999999996, + "avg": 0.000212, "count": 2, - "max": 0.000396, - "min": 0.000118, - "total": 0.0005139999999999999 + "max": 0.000284, + "min": 0.00014, + "total": 0.000424 }, "sirius::Unit_cell::get_symmetry": { - "avg": 0.013666500000000002, + "avg": 0.028816, "count": 2, - "max": 0.015789, - "min": 0.011544, - "total": 0.027333000000000003 + "max": 0.02905, + "min": 0.028582, + "total": 0.057632 }, "sirius::Unit_cell::initialize": { - "avg": 0.035371, + "avg": 0.054104, "count": 1, - "max": 0.035371, - "min": 0.035371, - "total": 0.035371 + "max": 0.054104, + "min": 0.054104, + "total": 0.054104 }, "sirius::Unit_cell::update": { - "avg": 0.0139395, + "avg": 0.0290375, "count": 2, - "max": 0.015923, - "min": 0.011956, - "total": 0.027879 + "max": 0.029199, + "min": 0.028876, + "total": 0.058075 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { - "avg": 0.013635500000000002, + "avg": 0.0287835, "count": 2, - "max": 0.015741, - "min": 0.01153, - "total": 0.027271000000000004 + "max": 0.029007, + "min": 0.02856, + "total": 0.057567 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { - "avg": 0.013453, + "avg": 0.0285465, "count": 2, - "max": 0.015515, - "min": 0.011391, - "total": 0.026906 + "max": 0.028793, + "min": 0.0283, + "total": 0.057093 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { - "avg": 5.95e-05, + "avg": 4.4999999999999996e-05, "count": 2, - "max": 6.6e-05, - "min": 5.3e-05, - "total": 0.000119 + "max": 6.1e-05, + "min": 2.9e-05, + "total": 8.999999999999999e-05 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { - "avg": 9.15e-05, + "avg": 0.00016350000000000002, "count": 2, - "max": 0.000132, - "min": 5.1e-05, - "total": 0.000183 + "max": 0.000176, + "min": 0.000151, + "total": 0.00032700000000000003 }, "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { - "avg": 1.6e-05, + "avg": 1.65e-05, "count": 2, - "max": 2.2e-05, + "max": 2.3e-05, "min": 1e-05, - "total": 3.2e-05 + "total": 3.3e-05 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw": { - "avg": 0.005162761904761904, - "count": 42, - "max": 0.006266, - "min": 0.004561, - "total": 0.21683599999999997 + "avg": 0.003077461538461539, + "count": 52, + "max": 0.006804, + "min": 0.002648, + "total": 0.16002800000000003 }, "sirius::Unit_cell_symmetry::symmetrize_function_pw|local": { - "avg": 0.0026809999999999994, - "count": 42, - "max": 0.003013, - "min": 0.002367, - "total": 0.11260199999999998 + "avg": 0.0026438461538461537, + "count": 52, + "max": 0.006216, + "min": 0.00227, + "total": 0.13748 }, "sirius::Unit_cell_symmetry::symmetrize_vector_function_pw_3c": { - "avg": 0.010895404761904762, - "count": 42, - "max": 0.012714, - "min": 0.009344, - "total": 0.457607 + "avg": 0.004113865384615385, + "count": 52, + "max": 0.005954, + "min": 0.003602, + "total": 0.21392100000000003 + }, + "sirius::dot": { + "avg": 0.0006011481481481481, + "count": 189, + "max": 0.000921, + "min": 0.000503, + "total": 0.11361699999999998 + }, + "sirius::gradient": { + "avg": 0.0010857205882352937, + "count": 136, + "max": 0.001761, + "min": 0.000212, + "total": 0.14765799999999993 + }, + "sirius::laplacian": { + "avg": 0.0005723333333333332, + "count": 54, + "max": 0.000702, + "min": 0.000482, + "total": 0.030905999999999993 } } } \ No newline at end of file diff --git a/verification/test9/sirius.json b/verification/test9/sirius.json index 5acd20c1a..a8c99d42c 100644 --- a/verification/test9/sirius.json +++ b/verification/test9/sirius.json @@ -4,7 +4,9 @@ "processing_unit" : "cpu", "std_evp_solver_type" : "lapack", "gen_evp_solver_type" : "lapack", - "verbosity" : 1 + "verbosity" : 1, + "print_forces" : true, + "print_stress" : true }, "parameters" : { From 55f235f633adb50dcd411f88106e79cf49b24abf Mon Sep 17 00:00:00 2001 From: toxa81 Date: Mon, 18 Mar 2019 23:47:17 +0100 Subject: [PATCH 24/28] cleanup of the mixer implementation --- src/mixer.hpp | 146 +++++++++++++++++++++----------------------------- 1 file changed, 62 insertions(+), 84 deletions(-) diff --git a/src/mixer.hpp b/src/mixer.hpp index 396198c28..d6ff95330 100644 --- a/src/mixer.hpp +++ b/src/mixer.hpp @@ -28,9 +28,9 @@ namespace sirius { -/// Abstract mixer +/// Abstract mixer. template -class Mixer // TODO: review mixer implementation, it's too obscure +class Mixer { protected: /// Size of the vector which is global to (in other words, shared between) all MPI ranks. @@ -39,14 +39,8 @@ class Mixer // TODO: review mixer implementation, it's too obscure /// Size of the vector which is local to MPI rank. int local_vector_size_; - /// Split shared vector size beteen all MPI ranks. - splindex spl_shared_size_; - - /// Local size of shared vector. - int spl_shared_local_size_{0}; - /// Local number of vector elements. - /** The local number of elements is a sum of local vector size and local size of shared vector. */ + /** The local number of elements is a sum of local vector size and shared vector size. */ int local_size_; /// Total number of vector elements. @@ -72,8 +66,10 @@ class Mixer // TODO: review mixer implementation, it's too obscure /// History of previous vectors. mdarray vectors_; - /// Output buffer for the shared (global) part of the vector. - mdarray output_buffer_; + /// Residuals of the input andvectors + mdarray residuals_; + + mdarray local_weight_; /// Base communicator. Communicator const& comm_; @@ -93,12 +89,13 @@ class Mixer // TODO: review mixer implementation, it's too obscure /// Compute RMS deviation between current vector and input vector. double rms_deviation() const { - double rms{0}; int ipos = idx_hist(count_); + double rms{0}; + #pragma omp parallel for schedule(static) reduction(+:rms) for (int i = 0; i < local_size_; i++) { - rms += std::pow(std::abs(vectors_(i, ipos) - input_buffer_(i)), 2); + rms += local_weight_[i] * std::pow(std::abs(vectors_(i, ipos) - input_buffer_(i)), 2); } comm_.allreduce(&rms, 1); @@ -106,6 +103,23 @@ class Mixer // TODO: review mixer implementation, it's too obscure return rms; } + /// Compute residual and residual square sum. + void compute_rss() + { + /* current position in history */ + int ipos = this->idx_hist(this->count_); + + double rss{0}; + + #pragma omp parallel for schedule(static) reduction(+:rss) + for (int i = 0; i < local_size_; i++) { + residuals_(i, ipos) = this->input_buffer_(i) - this->vectors_(i, ipos); + rss += local_weight_[i] * std::pow(std::abs(residuals_(i, ipos)), 2) * this->weights_(i); + } + this->comm_.allreduce(&rss, 1); + this->rss_ = rss; + } + /// Mix input buffer and previous vector and store result in the current vector. void mix_linear(double beta__) { @@ -116,11 +130,6 @@ class Mixer // TODO: review mixer implementation, it's too obscure for (int i = 0; i < local_size_; i++) { vectors_(i, ipos) = beta__ * input_buffer_(i) + (1 - beta__) * vectors_(i, ipos1); } - - T* ptr = (this->output_buffer_.size() == 0) ? nullptr : this->output_buffer_.template at(memory_t::host); - - /* collect shared data */ - comm_.allgather(&vectors_(0, ipos), ptr, spl_shared_size_.global_offset(), spl_shared_size_.local_size()); } public: @@ -137,22 +146,27 @@ class Mixer // TODO: review mixer implementation, it's too obscure size_t n = local_vector_size__; comm_.allreduce(&n, 1); + /* get the total size */ total_size_ = n + shared_vector_size_; - - spl_shared_size_ = splindex(shared_vector_size_, comm_.size(), comm_.rank()); - if (shared_vector_size_) { - spl_shared_local_size_ = spl_shared_size_.local_size(); - } - local_size_ = spl_shared_local_size_ + local_vector_size_; + /* get the local size */ + local_size_ = local_vector_size_ + shared_vector_size_; /* allocate input buffer */ input_buffer_ = mdarray(local_size_, memory_t::host, "Mixer::input_buffer_"); - /* allocate output bffer */ - output_buffer_ = mdarray(shared_vector_size_, memory_t::host, "Mixer::output_buffer_"); /* allocate storage for previous vectors */ vectors_ = mdarray(local_size_, max_history_, memory_t::host, "Mixer::vectors_"); /* allocate weights */ weights_ = mdarray(local_size_, memory_t::host, "Mixer::weights_"); weights_.zero(); + + residuals_ = mdarray(local_size_, max_history__); + + local_weight_ = mdarray(local_size_); + for (int i = 0; i < shared_vector_size_; i++) { + local_weight_[i] = 1.0 / comm_.size(); + } + for (int i = 0; i < local_vector_size_; i++) { + local_weight_[shared_vector_size_ + i] = 1.0; + } } virtual ~Mixer() @@ -161,32 +175,32 @@ class Mixer // TODO: review mixer implementation, it's too obscure void input_shared(int idx__, T value__, double w__ = 1.0) { + /* shared vector is first in the input buffer */ + assert(idx__ >= 0 && idx__ < shared_vector_size_); - auto offs_and_rank = spl_shared_size_.location(idx__); - if (offs_and_rank.rank == comm_.rank()) { - input_buffer_(offs_and_rank.local_index) = value__; - weights_(offs_and_rank.local_index) = w__; - } + input_buffer_(idx__) = value__; + weights_(idx__) = w__; } void input_local(int idx__, T value__, double w__ = 1.0) { assert(idx__ >= 0 && idx__ < local_vector_size_); - input_buffer_(spl_shared_local_size_ + idx__) = value__; - weights_(spl_shared_local_size_ + idx__) = w__; + input_buffer_(shared_vector_size_ + idx__) = value__; + weights_(shared_vector_size_ + idx__) = w__; } inline T output_shared(int idx) const { - return output_buffer_(idx); + int ipos = idx_hist(count_); + return vectors_(idx, ipos); } inline T output_local(int idx) const { int ipos = idx_hist(count_); - return vectors_(spl_shared_local_size_ + idx, ipos); + return vectors_(shared_vector_size_ + idx, ipos); } /// Initialize the mixer. @@ -245,8 +259,6 @@ class Broyden1 : public Mixer double beta0_; double beta_scaling_factor_; - mdarray residuals_; - public: Broyden1(int shared_vector_size__, int local_vector_size__, int max_history__, double beta__, double beta0__, double beta_scaling_factor__, Communicator const& comm__) @@ -254,7 +266,6 @@ class Broyden1 : public Mixer , beta0_(beta0__) , beta_scaling_factor_(beta_scaling_factor__) { - residuals_ = mdarray(this->local_size_, max_history__); } double mix(double rss_min__) @@ -265,28 +276,12 @@ class Broyden1 : public Mixer int ipos = this->idx_hist(this->count_); /* compute residual square sum */ - double rss{0}; - #pragma omp parallel for schedule(static) reduction(+:rss) - for (int i = 0; i < this->local_size_; i++) { - residuals_(i, ipos) = this->input_buffer_(i) - this->vectors_(i, ipos); - rss += std::pow(std::abs(residuals_(i, ipos)), 2) * this->weights_(i); - } - this->comm_.allreduce(&rss, 1); - this->rss_ = rss; + this->compute_rss(); /* exit if the vector has converged */ if (this->rss_ < rss_min__) { /* Warning: if the vector has converged to this degree, it will not be mixed; * the output buffer will contain the vector of the previous step */ - - // int i1 = this->idx_hist(this->count_); - ///* copy input to output */ - // for (int i = 0; i < this->local_size_; i++) { - // this->vectors_(i, i1) = this->input_buffer_(i); - //} - - // this->comm_.allgather(&this->vectors_(0, i1), this->output_buffer_.template at(memory_t::host), - // this->spl_shared_size_.global_offset(), this->spl_shared_size_.local_size()); return 0.0; } @@ -335,10 +330,10 @@ class Broyden1 : public Mixer double t{0}; #pragma omp parallel for schedule(static) reduction(+:t) for (int i = 0; i < this->local_size_; i++) { - T dr1 = residuals_(i, i1) - residuals_(i, i2); - T dr2 = residuals_(i, i3) - residuals_(i, i4); + T dr1 = this->residuals_(i, i1) - this->residuals_(i, i2); + T dr2 = this->residuals_(i, i3) - this->residuals_(i, i4); - t += std::real(std::conj(dr1) * dr2) * this->weights_(i); + t += std::real(std::conj(dr1) * dr2) * this->weights_(i) * this->local_weight_[i]; } S(j2, j1) = S(j1, j2) = t; } @@ -376,8 +371,9 @@ class Broyden1 : public Mixer double t{0}; #pragma omp parallel for schedule(static) reduction(+:t) for (int i = 0; i < this->local_size_; i++) { - T dr = residuals_(i, i1) - residuals_(i, i2); - t += std::real(std::conj(dr) * residuals_(i, ipos)) * this->weights_(i); + T dr = this->residuals_(i, i1) - this->residuals_(i, i2); + t += std::real(std::conj(dr) * this->residuals_(i, ipos)) * this->weights_(i) * + this->local_weight_[i]; } c(j) = t; } @@ -394,7 +390,7 @@ class Broyden1 : public Mixer #pragma omp parallel for schedule(static) for (int i = 0; i < this->local_size_; i++) { - T dr = residuals_(i, i1) - residuals_(i, i2); + T dr = this->residuals_(i, i1) - this->residuals_(i, i2); T dv = this->vectors_(i, i1) - this->vectors_(i, i2); this->input_buffer_(i) -= gamma * (dr * this->beta_ + dv); @@ -407,14 +403,9 @@ class Broyden1 : public Mixer #pragma omp parallel for schedule(static) for (int i = 0; i < this->local_size_; i++) { this->vectors_(i, i1) = - this->vectors_(i, ipos) + this->beta_ * residuals_(i, ipos) + this->input_buffer_(i); + this->vectors_(i, ipos) + this->beta_ * this->residuals_(i, ipos) + this->input_buffer_(i); } - T* ptr = (this->output_buffer_.size() == 0) ? nullptr : this->output_buffer_.template at(memory_t::host); - - this->comm_.allgather(&this->vectors_(0, i1), ptr, this->spl_shared_size_.global_offset(), - this->spl_shared_size_.local_size()); - /* increment the history step */ this->count_++; @@ -435,8 +426,6 @@ class Broyden2 : public Mixer double beta_scaling_factor_; double linear_mix_rms_tol_; - mdarray residuals_; - public: Broyden2(int shared_vector_size__, int local_vector_size__, int max_history__, double beta__, double beta0__, double linear_mix_rms_tol__, double beta_scaling_factor__, Communicator const& comm__) @@ -445,26 +434,14 @@ class Broyden2 : public Mixer , beta_scaling_factor_(beta_scaling_factor__) , linear_mix_rms_tol_(linear_mix_rms_tol__) { - residuals_ = mdarray(this->local_size_, max_history__); } double mix(double rss_min__) { PROFILE("sirius::Broyden2::mix"); - /* current position in history */ - int ipos = this->idx_hist(this->count_); - /* compute residual square sum */ - double rss{0}; - #pragma omp parallel for schedule(static) reduction(+:rss) - for (int i = 0; i < this->local_size_; i++) { - /* curent residual f_k = x_k - g(x_k) */ - residuals_(i, ipos) = this->vectors_(i, ipos) - this->input_buffer_(i); - rss += std::pow(std::abs(residuals_(i, ipos)), 2) * this->weights_(i); - } - this->comm_.allreduce(&rss, 1); - this->rss_ = rss; + this->compute_rss(); /* exit if the vector has converged */ if (this->rss_ < rss_min__) { @@ -516,7 +493,8 @@ class Broyden2 : public Mixer long double t{0}; #pragma omp parallel for schedule(static) reduction(+:t) for (int i = 0; i < this->local_size_; i++) { - t += std::real(std::conj(residuals_(i, i1)) * residuals_(i, i2)); + t += std::real(std::conj(this->residuals_(i, i1)) * this->residuals_(i, i2)) * + this->local_weight_[i]; } S(j2, j1) = S(j1, j2) = t; } @@ -577,7 +555,7 @@ class Broyden2 : public Mixer #pragma omp parallel for schedule(static) for (int i = 0; i < this->local_size_; i++) { this->input_buffer_(i) += - ((double)v2[j] * residuals_(i, i1) + (double)v2[j + N] * this->vectors_(i, i1)); + ((double)v2[j] * this->residuals_(i, i1) + (double)v2[j + N] * this->vectors_(i, i1)); } } /* mix last vector with the update vector \tilda x */ From c2e7e0fc50f03484208df810e4986609275d4318 Mon Sep 17 00:00:00 2001 From: toxa81 Date: Mon, 18 Mar 2019 23:50:11 +0100 Subject: [PATCH 25/28] add an option to repeat update() several times --- apps/dft_loop/sirius.scf.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/apps/dft_loop/sirius.scf.cpp b/apps/dft_loop/sirius.scf.cpp index 0b72b3f74..48bc642f1 100644 --- a/apps/dft_loop/sirius.scf.cpp +++ b/apps/dft_loop/sirius.scf.cpp @@ -41,15 +41,6 @@ std::unique_ptr create_sim_ctx(std::string fname__, auto gen_evp_solver_name = args__.value("gen_evp_solver_name", ctx.control().gen_evp_solver_name_); ctx.gen_evp_solver_name(gen_evp_solver_name); -// auto pu = args__.value("processing_unit", ctx.control().processing_unit_); -// if (pu == "") { -//#ifdef __GPU -// pu = "gpu"; -//#else -// pu = "cpu"; -//#endif -// } -// ctx.set_processing_unit(pu); ctx.import(args__); return std::move(ctx_ptr); @@ -97,6 +88,14 @@ double ground_state(Simulation_context& ctx, /* launch the calculation */ auto result = dft.find(inp.potential_tol_, inp.energy_tol_, inp.num_dft_iter_, write_state); + auto repeat_update = args.value("repeat_update", 0); + if (repeat_update) { + for (int i = 0; i < repeat_update; i++) { + dft.update(); + result = dft.find(inp.potential_tol_, inp.energy_tol_, inp.num_dft_iter_, write_state); + } + } + dft.print_magnetic_moment(); if (!ctx.full_potential()) { @@ -146,8 +145,8 @@ double ground_state(Simulation_context& ctx, } if (diff > 1e-6) { printf("total stress is different!"); - //std::cout << " reference: " << dict_ref["ground_state"]["stress"] << "\n"; - //std::cout << " computed: " << result["stress"] << "\n"; + std::cout << " reference: " << dict_ref["ground_state"]["stress"] << "\n"; + std::cout << " computed: " << result["stress"] << "\n"; ctx.comm().abort(2); } } @@ -162,8 +161,8 @@ double ground_state(Simulation_context& ctx, } if (diff > 1e-6) { printf("total force is different!"); - //std::cout << " reference: " << dict_ref["ground_state"]["stress"] << "\n"; - //std::cout << " computed: " << result["stress"] << "\n"; + std::cout << " reference: " << dict_ref["ground_state"]["forces"] << "\n"; + std::cout << " computed: " << result["forces"] << "\n"; ctx.comm().abort(3); } } @@ -357,6 +356,7 @@ int main(int argn, char** argv) args.register_key("--std_evp_solver_name=", "{string} standard eigen-value solver"); args.register_key("--gen_evp_solver_name=", "{string} generalized eigen-value solver"); args.register_key("--processing_unit=", "{string} type of the processing unit"); + args.register_key("--repeat_update=", "{int} number of times to repeat update()"); args.register_key("--control.processing_unit=", ""); args.register_key("--control.mpi_grid_dims=",""); args.register_key("--control.std_evp_solver_name=", ""); From 1e0d1365b1b4c78a33fa861bfac9b9824ae5d823 Mon Sep 17 00:00:00 2001 From: toxa81 Date: Tue, 19 Mar 2019 00:09:06 +0100 Subject: [PATCH 26/28] version update --- VERSION | 2 +- src/constants.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index dfda3e0b4..88d06f108 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.1.0 +6.1.3 diff --git a/src/constants.hpp b/src/constants.hpp index e8c0483f4..8eeacd135 100644 --- a/src/constants.hpp +++ b/src/constants.hpp @@ -29,7 +29,7 @@ const int major_version = 6; const int minor_version = 1; -const int revision = 0; +const int revision = 3; /// NIST value for the inverse fine structure (http://physics.nist.gov/cuu/Constants/index.html) const double speed_of_light = 137.035999139; From c97be8d90dabb03ffc069df3b8b0dba5a28ee914 Mon Sep 17 00:00:00 2001 From: toxa81 Date: Tue, 19 Mar 2019 00:09:40 +0100 Subject: [PATCH 27/28] remove high-frequency mixer --- src/Density/density.hpp | 99 ++++++++++------------------------------- 1 file changed, 24 insertions(+), 75 deletions(-) diff --git a/src/Density/density.hpp b/src/Density/density.hpp index c53cc112d..4812a9f25 100644 --- a/src/Density/density.hpp +++ b/src/Density/density.hpp @@ -165,20 +165,17 @@ class Density : public Field4D /// Fast mapping between composite lm index and corresponding orbital quantum number. std::vector l_by_lm_; - /// High-frequency mixer for the pseudopotential density mixing. - std::unique_ptr> hf_mixer_{nullptr}; - /// Low-frequency mixer for the pseudopotential density mixing. - std::unique_ptr> lf_mixer_{nullptr}; + std::unique_ptr> mixer_{nullptr}; /// List of local low-fequency G-vectors. - std::vector lf_gvec_; + //std::vector lf_gvec_; /// List of local high-fequency G-vectors. - std::vector hf_gvec_; + //std::vector hf_gvec_; - /// Weights of local low-frequency G-vectors. - std::vector lf_gvec_weights_; + /// Weights of G-vectors for the mixer + std::vector gvec_mixer_weights_; /// Allocate PAW data. void init_paw(); @@ -341,15 +338,10 @@ class Density : public Field4D for (int igloc = 0; igloc < ctx_.gvec().count(); igloc++) { int ig = ctx_.gvec().offset() + igloc; auto gv = ctx_.gvec().gvec_cart(igloc); - if (gv.length() <= 2 * ctx_.gk_cutoff()) { - lf_gvec_.push_back(igloc); - if (ig) { - lf_gvec_weights_.push_back(fourpi * unit_cell_.omega() / std::pow(gv.length(), 2)); - } else { - lf_gvec_weights_.push_back(0); - } + if (ig) { + gvec_mixer_weights_.push_back(fourpi * unit_cell_.omega() / std::pow(gv.length(), 2)); } else { - hf_gvec_.push_back(igloc); + gvec_mixer_weights_.push_back(0); } } @@ -825,35 +817,21 @@ class Density : public Field4D if (ctx_.full_potential()) { Field4D::mixer_input(); } else { - int ld = static_cast(hf_gvec_.size()); - if (hf_mixer_) { - /* input high-frequency components */ - for (int j = 0; j < ctx_.num_mag_dims() + 1; j++) { - for (int i = 0; i < static_cast(hf_gvec_.size()); i++) { - int igloc = hf_gvec_[i]; - hf_mixer_->input_local(i + j * ld, component(j).f_pw_local(igloc)); - } - } - } - - ld = static_cast(lf_gvec_.size()); - /* input low-frequency components */ + int ngv = ctx_.gvec().count(); for (int j = 0; j < ctx_.num_mag_dims() + 1; j++) { if (j == 0) { - for (int i = 0; i < static_cast(lf_gvec_.size()); i++) { - int igloc = lf_gvec_[i]; - lf_mixer_->input_local(i + j * ld, component(j).f_pw_local(igloc), lf_gvec_weights_[i]); + for (int igloc = 0; igloc < ngv; igloc++) { + mixer_->input_local(igloc + j * ngv, component(j).f_pw_local(igloc), gvec_mixer_weights_[igloc]); } } else { - for (int i = 0; i < static_cast(lf_gvec_.size()); i++) { - int igloc = lf_gvec_[i]; - lf_mixer_->input_local(i + j * ld, component(j).f_pw_local(igloc)); + for (int igloc = 0; igloc < ngv; igloc++) { + mixer_->input_local(igloc + j * ngv, component(j).f_pw_local(igloc)); } } } /* input commonly shared data */ for (int i = 0; i < static_cast(density_matrix_.size()); i++) { - lf_mixer_->input_shared(i, density_matrix_[i], 0); + mixer_->input_shared(i, density_matrix_[i], 0); } } } @@ -863,28 +841,16 @@ class Density : public Field4D if (ctx_.full_potential()) { Field4D::mixer_output(); } else { - int ld = static_cast(hf_gvec_.size()); - if (hf_mixer_) { - /* get high-frequency components */ - for (int j = 0; j < ctx_.num_mag_dims() + 1; j++) { - for (int i = 0; i < static_cast(hf_gvec_.size()); i++) { - int igloc = hf_gvec_[i]; - component(j).f_pw_local(igloc) = hf_mixer_->output_local(i + j * ld); - } - } - } - - ld = static_cast(lf_gvec_.size()); + int ngv = ctx_.gvec().count(); /* get low-frequency components */ for (int j = 0; j < ctx_.num_mag_dims() + 1; j++) { - for (int i = 0; i < static_cast(lf_gvec_.size()); i++) { - int igloc = lf_gvec_[i]; - component(j).f_pw_local(igloc) = lf_mixer_->output_local(i + j * ld); + for (int igloc = 0; igloc < ngv; igloc++) { + component(j).f_pw_local(igloc) = mixer_->output_local(igloc + j * ngv); } } for (int i = 0; i < static_cast(density_matrix_.size()); i++) { - density_matrix_[i] = lf_mixer_->output_shared(i); + density_matrix_[i] = mixer_->output_shared(i); } } } @@ -894,25 +860,11 @@ class Density : public Field4D if (ctx_.full_potential()) { Field4D::mixer_init(mixer_cfg__); } else { - auto conf1 = mixer_cfg__; - conf1.type_ = "linear"; - if (hf_gvec_.size() > 0) { - hf_mixer_ = Mixer_factory(0, - static_cast(hf_gvec_.size() * (1 + ctx_.num_mag_dims())), - conf1, - ctx_.comm()); - } - - lf_mixer_ = Mixer_factory(static_cast(density_matrix_.size()), - static_cast(lf_gvec_.size() * (1 + ctx_.num_mag_dims())), - mixer_cfg__, - ctx_.comm()); + mixer_ = Mixer_factory(static_cast(density_matrix_.size()), + ctx_.gvec().count() * (1 + ctx_.num_mag_dims()), + mixer_cfg__, ctx_.comm()); mixer_input(); - lf_mixer_->initialize(); - - if (hf_mixer_) { - hf_mixer_->initialize(); - } + mixer_->initialize(); } } @@ -928,10 +880,7 @@ class Density : public Field4D } else { /* mix in G-space in case of PP */ mixer_input(); - rms = lf_mixer_->mix(ctx_.settings().mixer_rss_min_); - if (hf_mixer_) { - rms += hf_mixer_->mix(ctx_.settings().mixer_rss_min_); - } + rms = mixer_->mix(ctx_.settings().mixer_rss_min_); mixer_output(); } @@ -940,7 +889,7 @@ class Density : public Field4D inline double dr2() const { - return lf_mixer_->rss(); + return mixer_->rss(); } mdarray const& density_matrix() const From 48133c1ebd3816ccdd4574b2d478799ef19753c4 Mon Sep 17 00:00:00 2001 From: toxa81 Date: Tue, 19 Mar 2019 09:25:50 +0100 Subject: [PATCH 28/28] add missing file --- verification/test15/output_ref.json | 996 ++++++++++++++++++++++++++++ 1 file changed, 996 insertions(+) create mode 100644 verification/test15/output_ref.json diff --git a/verification/test15/output_ref.json b/verification/test15/output_ref.json new file mode 100644 index 000000000..081df862d --- /dev/null +++ b/verification/test15/output_ref.json @@ -0,0 +1,996 @@ +{ + "comm_world_size": 1, + "counters": { + "band_evp_work_count": 225.30251851851855, + "local_operator_num_applied": 321 + }, + "git_hash": "2cd5e90efd5de9fbd0ee14102eab1edc30c45f26", + "ground_state": { + "aw_cutoff": 7.0, + "band_gap": 0.27331487138192734, + "chemical_formula": "LiF", + "converged": true, + "core_leakage": 0.0, + "efermi": 0.254296875, + "energy": { + "bxc": 0.0, + "core_eval_sum": 0.0, + "enuc": 0.0, + "eval_sum": -3.4289693562512125, + "ewald": -20.45564436758157, + "exc": -7.174790106144441, + "kin": 17.80853466095492, + "total": -36.41566091233912, + "veff": -21.23750401720613, + "vha": 19.13246540053192, + "vxc": -8.693441913715134 + }, + "fft_coarse_grid": [24,24,24], + "fft_grid": [40,40,40], + "forces": [ + [0.002318048228951569,0.0017404280188636073,0.0012839027848552537], + [-0.0023362284022196977,-0.0017286511119847803,-0.0012283888606507004] + ], + "mpi_grid": [1,1], + "num_atoms": 2, + "num_bands": 15, + "num_fv_states": -1, + "num_scf_iterations": 10, + "omega": 110.0926613870496, + "pw_cutoff": 20.0, + "stress": [ + [-0.0023331549769679367,2.8997803889554818e-05,2.1459816155008105e-05], + [2.8997803889554818e-05,-0.002326051381734177,1.68443293725836e-05], + [2.1459816155008105e-05,1.68443293725836e-05,-0.002320853614806935] + ] + }, + "task": 0, + "threads_per_rank": 8, + "timers": { + "Eigensolver_lapack|dsyevr": { + "avg": 0.0004895294117647058, + "count": 17, + "max": 0.000755, + "min": 0.000193, + "total": 0.008322 + }, + "Eigensolver_lapack|dsygvx": { + "avg": 0.00018825000000000005, + "count": 12, + "max": 0.000379, + "min": 0.000146, + "total": 0.0022590000000000006 + }, + "sddk::FFT3D::FFT3D": { + "avg": 0.006501, + "count": 2, + "max": 0.008266, + "min": 0.004736, + "total": 0.013002 + }, + "sddk::FFT3D::prepare": { + "avg": 6.300000000000001e-05, + "count": 47, + "max": 0.000127, + "min": 4.6e-05, + "total": 0.0029610000000000005 + }, + "sddk::FFT3D::prepare|cpu": { + "avg": 5.874468085106383e-05, + "count": 47, + "max": 0.000116, + "min": 4.1e-05, + "total": 0.002761 + }, + "sddk::FFT3D::transform": { + "avg": 0.0002637575757575756, + "count": 495, + "max": 0.001111, + "min": 0.000116, + "total": 0.13055999999999993 + }, + "sddk::FFT3D::transform_xy": { + "avg": 0.0001355939393939393, + "count": 495, + "max": 0.000677, + "min": 5.6e-05, + "total": 0.06711899999999996 + }, + "sddk::FFT3D::transform_z": { + "avg": 7.573637515842828e-05, + "count": 789, + "max": 0.000425, + "min": 4e-05, + "total": 0.05975599999999992 + }, + "sddk::FFT3D::transform_z_serial": { + "avg": 7.313561470215456e-05, + "count": 789, + "max": 0.000409, + "min": 3.8e-05, + "total": 0.05770399999999994 + }, + "sddk::FFT3D::transform_z_serial|cpu": { + "avg": 7.027122940430923e-05, + "count": 789, + "max": 0.000404, + "min": 3.6e-05, + "total": 0.055443999999999986 + }, + "sddk::Gvec::find_gvec_shells": { + "avg": 0.00036666666666666667, + "count": 6, + "max": 0.000677, + "min": 0.000144, + "total": 0.0022 + }, + "sddk::Gvec::init": { + "avg": 0.0011996666666666668, + "count": 3, + "max": 0.002576, + "min": 0.000269, + "total": 0.003599 + }, + "sddk::inner": { + "avg": 1.9786666666666675e-05, + "count": 75, + "max": 3.6e-05, + "min": 3e-06, + "total": 0.0014840000000000007 + }, + "sddk::inner|local": { + "avg": 1.7119999999999995e-05, + "count": 75, + "max": 3.2e-05, + "min": 1e-06, + "total": 0.0012839999999999998 + }, + "sddk::matrix_storage::matrix_storage": { + "avg": 1.1408450704225339e-06, + "count": 71, + "max": 5e-06, + "min": 0.0, + "total": 8.099999999999991e-05 + }, + "sddk::matrix_storage::remap_backward": { + "avg": 6.551724137931036e-07, + "count": 29, + "max": 1e-06, + "min": 0.0, + "total": 1.9000000000000004e-05 + }, + "sddk::matrix_storage::remap_forward": { + "avg": 3.6500000000000015e-06, + "count": 40, + "max": 7e-06, + "min": 2e-06, + "total": 0.00014600000000000005 + }, + "sddk::matrix_storage::set_num_extra": { + "avg": 8.840579710144918e-07, + "count": 69, + "max": 2e-06, + "min": 0.0, + "total": 6.099999999999994e-05 + }, + "sddk::orthogonalize": { + "avg": 0.00010241176470588236, + "count": 17, + "max": 0.000172, + "min": 4.1e-05, + "total": 0.0017410000000000001 + }, + "sddk::orthogonalize|tmtrx": { + "avg": 3.941176470588235e-06, + "count": 17, + "max": 1.9e-05, + "min": 1e-06, + "total": 6.699999999999999e-05 + }, + "sddk::orthogonalize|transform": { + "avg": 1.9882352941176468e-05, + "count": 17, + "max": 7.4e-05, + "min": 2e-06, + "total": 0.000338 + }, + "sddk::remap_gvec_to_shells|init": { + "avg": 0.001686, + "count": 1, + "max": 0.001686, + "min": 0.001686, + "total": 0.001686 + }, + "sddk::transform": { + "avg": 3.257692307692307e-05, + "count": 52, + "max": 7.6e-05, + "min": 1.4e-05, + "total": 0.0016939999999999998 + }, + "sddk::transform|init": { + "avg": 3.0961538461538447e-06, + "count": 52, + "max": 9e-06, + "min": 0.0, + "total": 0.00016099999999999993 + }, + "sddk::transform|local": { + "avg": 1.1916666666666664e-05, + "count": 108, + "max": 5.4e-05, + "min": 3e-06, + "total": 0.0012869999999999997 + }, + "sirius::Atom_type::init": { + "avg": 0.0190915, + "count": 2, + "max": 0.020638, + "min": 0.017545, + "total": 0.038183 + }, + "sirius::Augmentation_operator::generate_pw_coeffs": { + "avg": 0.0054645, + "count": 2, + "max": 0.005631, + "min": 0.005298, + "total": 0.010929 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs": { + "avg": 0.004215, + "count": 6, + "max": 0.005741, + "min": 0.003213, + "total": 0.02529 + }, + "sirius::Augmentation_operator_gvec_deriv::generate_pw_coeffs|qpw": { + "avg": 0.004199666666666667, + "count": 6, + "max": 0.005726, + "min": 0.003192, + "total": 0.025197999999999998 + }, + "sirius::Augmentation_operator_gvec_deriv|constructor": { + "avg": 0.007628, + "count": 1, + "max": 0.007628, + "min": 0.007628, + "total": 0.007628 + }, + "sirius::Band::diag_pseudo_potential_davidson": { + "avg": 0.012872272727272725, + "count": 11, + "max": 0.030398, + "min": 0.006205, + "total": 0.14159499999999997 + }, + "sirius::Band::diag_pseudo_potential_davidson|alloc": { + "avg": 2.0090909090909094e-05, + "count": 11, + "max": 2.7e-05, + "min": 1.8e-05, + "total": 0.00022100000000000003 + }, + "sirius::Band::diag_pseudo_potential_davidson|evp": { + "avg": 0.0003673928571428572, + "count": 28, + "max": 0.000759, + "min": 0.000149, + "total": 0.010287000000000001 + }, + "sirius::Band::diag_pseudo_potential_davidson|iter": { + "avg": 0.012535545454545454, + "count": 11, + "max": 0.030067, + "min": 0.005883, + "total": 0.13789099999999999 + }, + "sirius::Band::diag_pseudo_potential_davidson|update_phi": { + "avg": 3.266666666666666e-05, + "count": 12, + "max": 0.000104, + "min": 1.9e-05, + "total": 0.000392 + }, + "sirius::Band::initialize_subspace": { + "avg": 0.005981, + "count": 1, + "max": 0.005981, + "min": 0.005981, + "total": 0.005981 + }, + "sirius::Band::initialize_subspace|kp": { + "avg": 0.005373, + "count": 1, + "max": 0.005373, + "min": 0.005373, + "total": 0.005373 + }, + "sirius::Band::initialize_subspace|kp|wf": { + "avg": 0.000418, + "count": 1, + "max": 0.000418, + "min": 0.000418, + "total": 0.000418 + }, + "sirius::Band::residuals": { + "avg": 0.00024635714285714284, + "count": 28, + "max": 0.000367, + "min": 1e-06, + "total": 0.006898 + }, + "sirius::Band::residuals_aux": { + "avg": 0.00028447619047619046, + "count": 21, + "max": 0.000337, + "min": 0.000255, + "total": 0.005973999999999999 + }, + "sirius::Band::set_subspace_mtrx": { + "avg": 0.00012448780487804882, + "count": 41, + "max": 0.000245, + "min": 2e-05, + "total": 0.005104000000000001 + }, + "sirius::Band::solve": { + "avg": 0.013280545454545455, + "count": 11, + "max": 0.03084, + "min": 0.006629, + "total": 0.146086 + }, + "sirius::Beta_projectors::Beta_projectors": { + "avg": 0.000237, + "count": 1, + "max": 0.000237, + "min": 0.000237, + "total": 0.000237 + }, + "sirius::Beta_projectors::generate_pw_coefs_t": { + "avg": 0.000205, + "count": 1, + "max": 0.000205, + "min": 0.000205, + "total": 0.000205 + }, + "sirius::Beta_projectors_base::dismiss": { + "avg": 2.222222222222222e-07, + "count": 27, + "max": 1e-06, + "min": 0.0, + "total": 5.999999999999999e-06 + }, + "sirius::Beta_projectors_base::generate": { + "avg": 2.1692307692307693e-05, + "count": 13, + "max": 2.4e-05, + "min": 2e-05, + "total": 0.000282 + }, + "sirius::Beta_projectors_base::inner": { + "avg": 1.4203703703703711e-05, + "count": 54, + "max": 3.7e-05, + "min": 8e-06, + "total": 0.0007670000000000004 + }, + "sirius::Beta_projectors_base::prepare": { + "avg": 1.5e-06, + "count": 2, + "max": 2e-06, + "min": 1e-06, + "total": 3e-06 + }, + "sirius::Beta_projectors_strain_deriv::generate_pw_coefs_t": { + "avg": 0.000353, + "count": 1, + "max": 0.000353, + "min": 0.000353, + "total": 0.000353 + }, + "sirius::Broyden1::mix": { + "avg": 0.0010538181818181818, + "count": 11, + "max": 0.002588, + "min": 5.4e-05, + "total": 0.011592 + }, + "sirius::DFT_ground_state::ewald_energy": { + "avg": 0.000488, + "count": 1, + "max": 0.000488, + "min": 0.000488, + "total": 0.000488 + }, + "sirius::DFT_ground_state::scf_loop": { + "avg": 0.387781, + "count": 1, + "max": 0.387781, + "min": 0.387781, + "total": 0.387781 + }, + "sirius::DFT_ground_state::scf_loop|iteration": { + "avg": 0.034808, + "count": 11, + "max": 0.051957, + "min": 0.026867, + "total": 0.382888 + }, + "sirius::Density::add_k_point_contribution_dm": { + "avg": 0.00014481818181818183, + "count": 11, + "max": 0.000172, + "min": 0.000128, + "total": 0.001593 + }, + "sirius::Density::add_k_point_contribution_rg": { + "avg": 0.0012289090909090908, + "count": 11, + "max": 0.001396, + "min": 0.001114, + "total": 0.013517999999999999 + }, + "sirius::Density::augment": { + "avg": 0.0012707272727272725, + "count": 11, + "max": 0.002427, + "min": 0.001096, + "total": 0.013977999999999997 + }, + "sirius::Density::generate": { + "avg": 0.0029582727272727274, + "count": 11, + "max": 0.004127, + "min": 0.00275, + "total": 0.032541 + }, + "sirius::Density::generate_pseudo_core_charge_density": { + "avg": 0.001152, + "count": 1, + "max": 0.001152, + "min": 0.001152, + "total": 0.001152 + }, + "sirius::Density::generate_rho_aug": { + "avg": 0.0012084545454545455, + "count": 11, + "max": 0.002368, + "min": 0.001038, + "total": 0.013293000000000001 + }, + "sirius::Density::generate_rho_aug|gemm": { + "avg": 0.00017209090909090912, + "count": 22, + "max": 0.001423, + "min": 9.7e-05, + "total": 0.0037860000000000007 + }, + "sirius::Density::generate_rho_aug|sum": { + "avg": 0.0002770454545454545, + "count": 22, + "max": 0.000428, + "min": 0.00026, + "total": 0.006095 + }, + "sirius::Density::generate_valence": { + "avg": 0.0029539090909090904, + "count": 11, + "max": 0.004119, + "min": 0.002746, + "total": 0.032492999999999994 + }, + "sirius::Density::initial_density": { + "avg": 0.003138, + "count": 1, + "max": 0.003138, + "min": 0.003138, + "total": 0.003138 + }, + "sirius::Density::symmetrize_density_matrix": { + "avg": 1.9909090909090913e-05, + "count": 11, + "max": 3.9e-05, + "min": 1.7e-05, + "total": 0.00021900000000000004 + }, + "sirius::Density::update": { + "avg": 0.001166, + "count": 1, + "max": 0.001166, + "min": 0.001166, + "total": 0.001166 + }, + "sirius::Field4D::symmetrize": { + "avg": 4.5454545454545457e-07, + "count": 22, + "max": 1e-06, + "min": 0.0, + "total": 1e-05 + }, + "sirius::Force::calc_forces_core": { + "avg": 0.00093, + "count": 1, + "max": 0.00093, + "min": 0.00093, + "total": 0.00093 + }, + "sirius::Force::calc_forces_ewald": { + "avg": 0.000758, + "count": 1, + "max": 0.000758, + "min": 0.000758, + "total": 0.000758 + }, + "sirius::Force::calc_forces_nonloc": { + "avg": 0.000517, + "count": 1, + "max": 0.000517, + "min": 0.000517, + "total": 0.000517 + }, + "sirius::Force::calc_forces_scf_corr": { + "avg": 0.000376, + "count": 1, + "max": 0.000376, + "min": 0.000376, + "total": 0.000376 + }, + "sirius::Force::calc_forces_us": { + "avg": 0.003264, + "count": 1, + "max": 0.003264, + "min": 0.003264, + "total": 0.003264 + }, + "sirius::Force::calc_forces_vloc": { + "avg": 0.000543, + "count": 1, + "max": 0.000543, + "min": 0.000543, + "total": 0.000543 + }, + "sirius::Hamiltonian::apply_h_s": { + "avg": 0.004040758620689654, + "count": 29, + "max": 0.005759, + "min": 0.000535, + "total": 0.11718199999999998 + }, + "sirius::Hamiltonian::get_h_diag": { + "avg": 0.0001440909090909091, + "count": 11, + "max": 0.000161, + "min": 0.000128, + "total": 0.001585 + }, + "sirius::Hamiltonian::get_o_diag": { + "avg": 0.00014599999999999997, + "count": 11, + "max": 0.000174, + "min": 0.000136, + "total": 0.0016059999999999998 + }, + "sirius::K_point::K_point": { + "avg": 4e-06, + "count": 1, + "max": 4e-06, + "min": 4e-06, + "total": 4e-06 + }, + "sirius::K_point::generate_gkvec": { + "avg": 0.000297, + "count": 1, + "max": 0.000297, + "min": 0.000297, + "total": 0.000297 + }, + "sirius::K_point::initialize": { + "avg": 0.000756, + "count": 1, + "max": 0.000756, + "min": 0.000756, + "total": 0.000756 + }, + "sirius::K_point::update": { + "avg": 0.000434, + "count": 1, + "max": 0.000434, + "min": 0.000434, + "total": 0.000434 + }, + "sirius::K_point_set::add_kpoint": { + "avg": 1.3e-05, + "count": 1, + "max": 1.3e-05, + "min": 1.3e-05, + "total": 1.3e-05 + }, + "sirius::K_point_set::create_k_mesh": { + "avg": 0.002216, + "count": 1, + "max": 0.002216, + "min": 0.002216, + "total": 0.002216 + }, + "sirius::K_point_set::find_band_occupancies": { + "avg": 6.818181818181819e-06, + "count": 11, + "max": 1.2e-05, + "min": 3e-06, + "total": 7.500000000000001e-05 + }, + "sirius::K_point_set::initialize": { + "avg": 0.000766, + "count": 1, + "max": 0.000766, + "min": 0.000766, + "total": 0.000766 + }, + "sirius::K_point_set::sync_band_energies": { + "avg": 2.7272727272727276e-06, + "count": 11, + "max": 4e-06, + "min": 2e-06, + "total": 3.0000000000000004e-05 + }, + "sirius::Local_operator::apply_h": { + "avg": 0.0038742068965517238, + "count": 29, + "max": 0.005572, + "min": 0.000397, + "total": 0.112352 + }, + "sirius::Local_operator::prepare": { + "avg": 0.00016995833333333335, + "count": 24, + "max": 0.000565, + "min": 6e-06, + "total": 0.004079 + }, + "sirius::Non_local_operator::Non_local_operator": { + "avg": 9.166666666666667e-07, + "count": 24, + "max": 2e-06, + "min": 0.0, + "total": 2.2000000000000003e-05 + }, + "sirius::Non_local_operator::apply": { + "avg": 6.455172413793103e-05, + "count": 58, + "max": 9.3e-05, + "min": 4.7e-05, + "total": 0.003744 + }, + "sirius::Periodic_function::add": { + "avg": 6.158333333333332e-05, + "count": 24, + "max": 7.6e-05, + "min": 5e-05, + "total": 0.0014779999999999997 + }, + "sirius::Periodic_function::inner": { + "avg": 6.942268041237117e-05, + "count": 97, + "max": 0.000116, + "min": 5.7e-05, + "total": 0.006734000000000003 + }, + "sirius::Periodic_function::integrate": { + "avg": 6.716666666666667e-05, + "count": 12, + "max": 0.000122, + "min": 5.2e-05, + "total": 0.000806 + }, + "sirius::Potential::Potential": { + "avg": 0.003107, + "count": 1, + "max": 0.003107, + "min": 0.003107, + "total": 0.003107 + }, + "sirius::Potential::generate": { + "avg": 0.01434075, + "count": 12, + "max": 0.015049, + "min": 0.01379, + "total": 0.172089 + }, + "sirius::Potential::generate_D_operator_matrix": { + "avg": 0.0011324166666666666, + "count": 12, + "max": 0.001187, + "min": 0.001101, + "total": 0.013588999999999999 + }, + "sirius::Potential::generate_PAW_effective_potential": { + "avg": 0.009657, + "count": 12, + "max": 0.01019, + "min": 0.00906, + "total": 0.115884 + }, + "sirius::Potential::generate_local_potential": { + "avg": 0.001472, + "count": 1, + "max": 0.001472, + "min": 0.001472, + "total": 0.001472 + }, + "sirius::Potential::poisson": { + "avg": 0.0006279166666666667, + "count": 12, + "max": 0.000772, + "min": 0.000582, + "total": 0.007535 + }, + "sirius::Potential::update": { + "avg": 0.001487, + "count": 1, + "max": 0.001487, + "min": 0.001487, + "total": 0.001487 + }, + "sirius::Potential::xc": { + "avg": 0.002344, + "count": 12, + "max": 0.003205, + "min": 0.002085, + "total": 0.028128 + }, + "sirius::Potential::xc_mt_nonmagnetic": { + "avg": 0.0005324374999999998, + "count": 48, + "max": 0.000582, + "min": 0.000456, + "total": 0.025556999999999993 + }, + "sirius::Potential::xc_rg_nonmagnetic": { + "avg": 0.0023396666666666674, + "count": 12, + "max": 0.003196, + "min": 0.002082, + "total": 0.028076000000000007 + }, + "sirius::Radial_integrals|atomic_centered_wfc": { + "avg": 0.064, + "count": 2, + "max": 0.065128, + "min": 0.062872, + "total": 0.128 + }, + "sirius::Radial_integrals|aug": { + "avg": 0.800689, + "count": 2, + "max": 0.846618, + "min": 0.75476, + "total": 1.601378 + }, + "sirius::Radial_integrals|beta": { + "avg": 0.10790749999999999, + "count": 2, + "max": 0.112341, + "min": 0.103474, + "total": 0.21581499999999998 + }, + "sirius::Radial_integrals|rho_core_pseudo": { + "avg": 0.043328, + "count": 2, + "max": 0.047735, + "min": 0.038921, + "total": 0.086656 + }, + "sirius::Radial_integrals|rho_pseudo": { + "avg": 0.035186, + "count": 1, + "max": 0.035186, + "min": 0.035186, + "total": 0.035186 + }, + "sirius::Radial_integrals|vloc": { + "avg": 0.14024799999999998, + "count": 2, + "max": 0.149245, + "min": 0.131251, + "total": 0.28049599999999997 + }, + "sirius::Simulation_context::init_atoms_to_grid_idx": { + "avg": 0.001218, + "count": 1, + "max": 0.001218, + "min": 0.001218, + "total": 0.001218 + }, + "sirius::Simulation_context::init_comm": { + "avg": 0.00024, + "count": 1, + "max": 0.00024, + "min": 0.00024, + "total": 0.00024 + }, + "sirius::Simulation_context::init_fft": { + "avg": 0.018381, + "count": 1, + "max": 0.018381, + "min": 0.018381, + "total": 0.018381 + }, + "sirius::Simulation_context::initialize": { + "avg": 2.423359, + "count": 1, + "max": 2.423359, + "min": 2.423359, + "total": 2.423359 + }, + "sirius::Simulation_context::make_periodic_function": { + "avg": 0.00018166666666666667, + "count": 6, + "max": 0.000241, + "min": 0.00011, + "total": 0.00109 + }, + "sirius::Simulation_context::update": { + "avg": 0.015984, + "count": 1, + "max": 0.015984, + "min": 0.015984, + "total": 0.015984 + }, + "sirius::Simulation_parameters::import": { + "avg": 0.000179, + "count": 1, + "max": 0.000179, + "min": 0.000179, + "total": 0.000179 + }, + "sirius::Smooth_periodic_function::fft_transform": { + "avg": 0.00038875555555555546, + "count": 90, + "max": 0.001224, + "min": 0.000169, + "total": 0.03498799999999999 + }, + "sirius::Smooth_periodic_function::gather_f_pw": { + "avg": 1.65e-05, + "count": 2, + "max": 1.8e-05, + "min": 1.5e-05, + "total": 3.3e-05 + }, + "sirius::Smooth_periodic_function|inner": { + "avg": 6.619548872180451e-05, + "count": 133, + "max": 0.000114, + "min": 5.4e-05, + "total": 0.008804 + }, + "sirius::Stress|ewald": { + "avg": 0.000535, + "count": 1, + "max": 0.000535, + "min": 0.000535, + "total": 0.000535 + }, + "sirius::Stress|har": { + "avg": 0.000189, + "count": 1, + "max": 0.000189, + "min": 0.000189, + "total": 0.000189 + }, + "sirius::Stress|kin": { + "avg": 0.000122, + "count": 1, + "max": 0.000122, + "min": 0.000122, + "total": 0.000122 + }, + "sirius::Stress|nonloc": { + "avg": 0.00131, + "count": 1, + "max": 0.00131, + "min": 0.00131, + "total": 0.00131 + }, + "sirius::Stress|us": { + "avg": 0.041397, + "count": 1, + "max": 0.041397, + "min": 0.041397, + "total": 0.041397 + }, + "sirius::Stress|us|gemm": { + "avg": 0.00032205555555555557, + "count": 18, + "max": 0.000412, + "min": 0.000268, + "total": 0.005797 + }, + "sirius::Stress|us|phase_fac": { + "avg": 7.7e-05, + "count": 2, + "max": 8e-05, + "min": 7.4e-05, + "total": 0.000154 + }, + "sirius::Stress|us|prepare": { + "avg": 8.861111111111113e-05, + "count": 18, + "max": 0.000122, + "min": 7.2e-05, + "total": 0.0015950000000000003 + }, + "sirius::Stress|vloc": { + "avg": 0.000552, + "count": 1, + "max": 0.000552, + "min": 0.000552, + "total": 0.000552 + }, + "sirius::Unit_cell::find_nearest_neighbours": { + "avg": 0.0002685, + "count": 2, + "max": 0.000364, + "min": 0.000173, + "total": 0.000537 + }, + "sirius::Unit_cell::get_symmetry": { + "avg": 0.0018449999999999999, + "count": 2, + "max": 0.001929, + "min": 0.001761, + "total": 0.0036899999999999997 + }, + "sirius::Unit_cell::initialize": { + "avg": 0.040512, + "count": 1, + "max": 0.040512, + "min": 0.040512, + "total": 0.040512 + }, + "sirius::Unit_cell::update": { + "avg": 0.0021219999999999998, + "count": 2, + "max": 0.002303, + "min": 0.001941, + "total": 0.0042439999999999995 + }, + "sirius::Unit_cell_symmetry::Unit_cell_symmetry": { + "avg": 0.001756, + "count": 2, + "max": 0.001823, + "min": 0.001689, + "total": 0.003512 + }, + "sirius::Unit_cell_symmetry::Unit_cell_symmetry|spg": { + "avg": 0.0016385, + "count": 2, + "max": 0.0017, + "min": 0.001577, + "total": 0.003277 + }, + "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym1": { + "avg": 1.35e-05, + "count": 2, + "max": 2.4e-05, + "min": 3e-06, + "total": 2.7e-05 + }, + "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym2": { + "avg": 8.55e-05, + "count": 2, + "max": 8.6e-05, + "min": 8.5e-05, + "total": 0.000171 + }, + "sirius::Unit_cell_symmetry::Unit_cell_symmetry|sym3": { + "avg": 9e-06, + "count": 2, + "max": 1.6e-05, + "min": 2e-06, + "total": 1.8e-05 + } + } +} \ No newline at end of file